def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') if True: f = open("id_dis_domain", "r") for line in f.readlines(): line = line.strip("\n") base_url = "http://www.alexa.com/siteinfo/" id, display_name, domain = line.split(",")[1], line.split(",")[2], line.split(",")[3] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [("2016-05-31", id, display_name, alexaGlobalRank,alexaTopCountry,1)] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into Website_Alexa_Info (CreateDate, WebsiteId, DisplayName, Rank, TopOneCountry, IsEndOfMonth) values (%s, %s, %s, %s, %s, %s) on duplicate key update Rank = values(Rank), TopOneCountry = values(TopOneCountry) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def addAlexa(d): sendToMe(subject = "add alexa data start", body = "add alexa data start") logger.info(" addSiteBasedAlexa start") get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" %d insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);" try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_result = target_mysql.queryCMD(get_alexa_sql) target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result) target_mysql.commit() except Exception, e: sendToMe(subject = "add alexa data ERROR", body = e) logger.DEBUG("add alexa data %s" %e)
def addAlexa(d): sendToMe(subject="add alexa data start", body="add alexa data start") logger.info(" addSiteBasedAlexa start") get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" % d insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);" try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_result = target_mysql.queryCMD(get_alexa_sql) target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result) target_mysql.commit() except Exception, e: sendToMe(subject="add alexa data ERROR", body=e) logger.DEBUG("add alexa data %s" % e)
finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 start") logger.info(":load data to TitleBasedMeta start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') insertUpdate_SQL = """ INSERT INTO TitleBasedMeta(trackingMeta_id, metaType, title, ETLDate) VALUES(%s, %s, %s, %s) on duplicate key update title = values(title), ETLDate = values(ETLDate), metaType = values(metaType) """ target_mysql.insertUpdateCMD(insertUpdate_SQL, result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBasedMeta, %s" % e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(":load data to TitleBasedMeta end") #################################################################################################################################
title, sum(ReportedViews) as reportedViews, current_timestamp as ETLDate from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.Date_ID >= '%(date_para_reportedViews_min)s' and a.Date_ID < '%(date_para_reportedViews_max)s' group by 1, 2, 3, 4, 5 """ %date_para_reportedViews_dict reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL) insert_reportedViews_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_reportedViews_SQL, reportedViews_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate to TitleBased1 end")
vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 start") logger.info(":load data to TitleBasedCountry start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') insertUpdate_SQL = """ INSERT INTO TitleBasedCountry (country_id, region, countryName, nationalFlag, ETLDate) VALUES(%s, %s, %s, %s, %s) on duplicate key update region = values(region), ETLDate = values(ETLDate), countryName = values(countryName), nationalFlag = VALUES(nationalFlag) """ target_mysql.insertUpdateCMD(insertUpdate_SQL, [(t[0], t[1], t[2].title(), t[3], t[4]) for t in result]) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBasedCountry, %s" %e) sendToMe(subject = "titleBased_country ERROR", body = e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(":load data to TitleBasedCountry end") sendToMe(subject = "TitleBasedCountry end", body = "load data to TitleBasedCountry end") #################################################################################################################################
and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.Date_ID >= '%(date_para_reportedViews_min)s' and a.Date_ID < '%(date_para_reportedViews_max)s' group by 1, 2, 3, 4, 5 """ % date_para_reportedViews_dict reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL) insert_reportedViews_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_reportedViews_SQL, reportedViews_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" % e) sendToMe(subject="titleBased_views ERROR", body=e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info( " aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate to TitleBased1 end" ) sendToMe( subject="titleBased_views End", body= "aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate(views) to TitleBased1 end"
matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero, infringingNumCMS, clipDurationSum, clipDurationInfringingSum, reportedViews, infringingViews, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), matchedNum = VALUES(matchedNum), matchedNumDurationNoZero = values(matchedNumDurationNoZero), infringingNum = VALUES(infringingNum), infringingNumDurationNoZero = values(infringingNumDurationNoZero), infringingNumCMS = VALUES(infringingNumCMS), clipDurationSum = VALUES(clipDurationSum),clipDurationInfringingSum = VALUES(clipDurationInfringingSum), reportedViews = VALUES(reportedViews), infringingViews = values(infringingViews), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_SiteBased_SQL, aggregate_TitleBased1_result) target_mysql.commit() except Exception, e: logger.debug("load data to SiteBased, %s" %e) sendToMe(subject = "SiteBased Error", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" aggregate data from TitleBased1 to SiteBased end") ################################################################################################################################# #-------------------------------------------------------------------------------------------------------------------------------- logger.info(" aggregate data from TitleBasedRemoveNum to SiteBasedRemoveNum start") try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
time.sleep(random.randint(3, 5)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) target_mysql.commit() else: logger.info("has no data %s" % alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" % e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(":load data to SiteBasedAlexa end") ################################################################################################################################# #sed "/\/home\/vobile\/cwj\/ViacomProject\/dashboard\/job/\/Job\/VIACOM\/Dashboard\/TitleBased/g"
def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" % cfg_file) sendToMe(subject="SiteBased_alexa ERROR", body="config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject="SiteBased_alexa start", body="extract data from siteBased start") target_server_section = "staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') if True: f = open("id_dis_domain", "r") for line in f.readlines(): line = line.strip("\n") base_url = "http://www.alexa.com/siteinfo/" id, display_name, domain = line.split(",")[1], line.split( ",")[2], line.split(",")[3] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [("2016-05-31", id, display_name, alexaGlobalRank, alexaTopCountry, 1)] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into Website_Alexa_Info (CreateDate, WebsiteId, DisplayName, Rank, TopOneCountry, IsEndOfMonth) values (%s, %s, %s, %s, %s, %s) on duplicate key update Rank = values(Rank), TopOneCountry = values(TopOneCountry) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject="update SiteBasedAlexa Error", body=re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" % alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" % e) sendToMe(subject="SiteBasedAlexa ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
try: target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') insertUpdate_SQL = """ INSERT INTO TitleBasedCountry (country_id, region, countryName, nationalFlag, ETLDate) VALUES(%s, %s, %s, %s, %s) on duplicate key update region = values(region), ETLDate = values(ETLDate), countryName = values(countryName), nationalFlag = VALUES(nationalFlag) """ target_mysql.insertUpdateCMD(insertUpdate_SQL, [(t[0], t[1], t[2].title(), t[3], t[4]) for t in result]) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBasedCountry, %s" % e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(":load data to TitleBasedCountry end") #################################################################################################################################
insert_SiteBased_SQL = """ INSERT INTO SiteBased (reportDate, trackingWebsite_id, websiteName, websiteType, websiteDomain, country_id, hostCountry, title, matchedNum, infringingNum, infringingNumCMS, clipDurationSum, reportedViews, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), matchedNum = VALUES(matchedNum), infringingNum = VALUES(infringingNum), infringingNumCMS = VALUES(infringingNumCMS), clipDurationSum = VALUES(clipDurationSum), reportedViews = VALUES(reportedViews), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_SiteBased_SQL, aggregate_TitleBased1_result) target_mysql.commit() except Exception, e: logger.debug("load data to SiteBased, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" aggregate data from TitleBased1 to SiteBased end") ################################################################################################################################# #-------------------------------------------------------------------------------------------------------------------------------- logger.info(" aggregate data from TitleBasedRemoveNum to SiteBasedRemoveNum start") try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBasedRemoveNum_SQL = """
and a.Date_ID >= '%(date_para_CMS_min)s' and a.Date_ID < '%(date_para_CMS_max)s' group by 1, 2, 3, 4, 5 """ % date_para_CMS_dict CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL) insert_CMS_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingNumCMS, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" % e) sendToMe(subject="TitleBased_views ERROR", body=e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info( " aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to TitleBased1 end" ) sendToMe( subject="titleBased_CMS End", body= "aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to TitleBased1 end"
sum(clipDurationSum) as clipDurationSum, current_timestamp as ETLDate from TitleBased as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a. title = b.metaTitle group by 1, 2, 3, 4, 5, 6 """ #%date_para_TitleBased1_dict TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) insert_TitleBased1_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, tier, matchedNum, infringingNum, clipDurationSum, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum), infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" aggregate data from TitleBased to TitleBased1 end")
group by 1, 2, 3, 4, 5, 6) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle group by 1, 2, 3, 4, 5, 6 """ print "====================" aggregate_result = target_mysql.queryCMD(aggregate_SQL) print aggregate_result[1] insertUpdate_SQL = """ INSERT INTO TitleBasedRemoveNum1 (reportDate, takeoffDate, trackingWebsite_id, websiteName, websiteType, title, removedNum, complianceTime, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE removedNum = VALUES(removedNum), complianceTime = VALUES(complianceTime), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insertUpdate_SQL, aggregate_result) target_mysql.commit() except Exception, e: logger.debug(" load data to TitleBasedRemoveNum1, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" load data to TitleBasedRemoveNum1 end") ################################################################################################################################# #sed "/\/home\/vobile\/cwj\/ViacomProject\/dashboard\/job/\/Job\/VIACOM\/Dashboard\/TitleBased/g"
def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" % cfg_file) sendToMe(subject="SiteBased_alexa ERROR", body="config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject="SiteBased_alexa start", body="extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime( str(alexa_date_max), "%Y-%m-%d")) - time.mktime( time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10 * 24 * 60 * 60: alexa_date_dict = { "alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max } site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ % alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo( url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD( insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject="update SiteBasedAlexa Error", body=re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" % alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" % e) sendToMe(subject="SiteBasedAlexa ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def main(): cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10*24*60*60: alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max} site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ %alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.trackingWebsite_id = 1 and a.Date_ID >= '%(date_para_CMS_min)s' and a.Date_ID < '%(date_para_CMS_max)s' group by 1, 2, 3, 4, 5 """ %date_para_CMS_dict CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL) insert_CMS_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingNumCMS, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(" aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to TitleBased1 end")