def main(): sendToMe(subject = "matchedVideoViewCountCompletionAll start", body = "matchedVideoViewCountCompletionAll start") cfg_file = "/Job/HBO/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "titleBased_matchedVideoViewCountCompletionAll" judgeFileExist(cfg_file, job_name) insight_start_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) tmp_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at") if not tmp_date: tmp_date = "2015-02-28" if tmp_date == insight_start_date: sendToMe(subject = "matchedVideoViewCountCompletionAll Error", body = "insight2.1 matchedVideoViewCountCompletionAll no data") break else: insight_start_date = tmp_date sendToMe(subject = "matchedVideoViewCountCompletionAll end", body = "matchedVideoViewCountCompletionAll end")
def dataTo_matchedVideoTmp(): logger.info("load data target matchedVideoTmp start") sendToMe(subject = "matchedVideoTmp start", body = "matchedVideoTmp start") target_db = "tracker2" min_reportDate = getMinDatePara("matchedVideoTmp", "created_at", db = target_db) min_updateDate = getMinDatePara("matchedVideoTmp", "updated_at", db = target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" data = get_matchedVideo(min_reportDate, min_updateDate) insert_sql = """ insert into matchedVideoTmp (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id), view_count = values(view_count), count_send_notice = values(count_send_notice), first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time), hide_flag = values(hide_flag), clip_duration = values(clip_duration), matchedFile_id = values(matchedFile_id), meta_title = values(meta_title), created_at = values(created_at), updated_at = values(updated_at) """ dataToTarget(data, target_db, insert_sql) sendToMe(subject = "matchedVideoTmp end", body = "matchedVideoTmp end") logger.info("load data target matchedVideoTmp end")
def main(): sendToMe(subject="matchedVideoViewCountCompletion start", body="matchedVideoViewCountCompletion start") insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str( time.strftime('%Y-%m-%d', time.localtime(time.time() - 1 * 24 * 60 * 60))): break insight_end_date = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) print insight_start_date, insight_end_date getInsightViews(start_date=insight_start_date, end_date=insight_end_date) insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") sendToMe(subject="matchedVideoViewCountCompletion end", body="matchedVideoViewCountCompletion end")
def main(): sendToMe(subject = "matchedVideoViewCountCompletionAll start", body = "matchedVideoViewCountCompletionAll start") cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "titleBased_matchedVideoViewCountCompletionAll" judgeFileExist(cfg_file, job_name) insight_start_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at") if not insight_start_date: insight_start_date = "2015-04-30" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) tmp_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at") if not tmp_date: tmp_date = "2015-02-28" if tmp_date == insight_start_date: sendToMe(subject = "matchedVideoViewCountCompletionAll Error", body = "insight2.1 matchedVideoViewCountCompletionAll no data") break else: insight_start_date = tmp_date sendToMe(subject = "matchedVideoViewCountCompletionAll end", body = "matchedVideoViewCountCompletionAll end")
def dataTo_matchedVideoTmp(): logger.info("load data target matchedVideoTmp start") sendToMe(subject = "matchedVideoTmp start", body = "matchedVideoTmp start") target_db = "FOX_DASHBOARD" min_reportDate = getMinDatePara("matchedVideoTmp", "created_at", db = target_db) min_updateDate = getMinDatePara("matchedVideoTmp", "updated_at", db = target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" data = get_matchedVideo(min_reportDate, min_updateDate) insert_sql = """ insert into matchedVideoTmp (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id), view_count = values(view_count), count_send_notice = values(count_send_notice), first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time), hide_flag = values(hide_flag), clip_duration = values(clip_duration), matchedFile_id = values(matchedFile_id), meta_title = values(meta_title), created_at = values(created_at), updated_at = values(updated_at) """ dataToTarget(data, target_db, insert_sql) sendToMe(subject = "matchedVideoTmp end", body = "matchedVideoTmp end") logger.info("load data target matchedVideoTmp end")
def main(): sendToMe(subject = "titleBased_infringAllViews start", body = "titleBased_infringAllViews start") matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate") if not matchedVideo_start_date: matchedVideo_start_date = "2015-02-28" while True: if str(matchedVideo_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))): break matchedVideo_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(matchedVideo_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getMatchedVideo(start_date = matchedVideo_start_date, end_date = matchedVideo_end_date) matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate") # --------------------------------------------------------------------------------------------------------------------- insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") sendToMe(subject = "titleBased_infringAllViews end", body = "titleBased_infringAllViews end")
def main(): sendToMe(subject = "matchedVideoViewCountCompletion start", body = "matchedVideoViewCountCompletion start") insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 2 * 24 * 60 * 60))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") sendToMe(subject = "matchedVideoViewCountCompletion end", body = "matchedVideoViewCountCompletion end")
def main(): sendToMe(subject="matchedVideo start", body="matchedVideo start") cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "matchedVideo" judgeFileExist(cfg_file, job_name) target_db = "tracker2" min_reportDate = getMinDatePara("matchedVideo", "created_at", db=target_db) min_updateDate = getMinDatePara("matchedVideo", "updated_at", db=target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" data = getMatchedVideo(min_reportDate, min_updateDate) dataToTarget(data, target_db) sendToMe(subject="matchedVideo end", body="matchedVideo end")
def main(): sendToMe(subject = "matchedVideo start", body = "matchedVideo start") cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "matchedVideo" judgeFileExist(cfg_file, job_name) target_db = "tracker2" min_reportDate = getMinDatePara("Test", "created_at", db = target_db) min_updateDate = getMinDatePara("Test", "updated_at", db = target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" min_reportDate = '2016-05-08' data = getMatchedVideo(min_reportDate, min_updateDate) dataToTarget(data, target_db) sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
def updateSiteBasedAlexa(): logger.info("start") alexa_report_date_para = getMinDatePara("SiteBasedAlexa", "reportDate") update_SQL = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.reportDate > (select max(reportDate) from SiteBasedAlexa) """ logger.info("end")
def main(): sendToMe(subject = "matchedVideo start", body = "matchedVideo start") while True: min_reportDate = getMinDatePara("matchedVideo", "reportDate") if not min_reportDate: min_reportDate = "2015-02-28" max_reportDate = time.strftime("%Y-%m-%d", \ time.localtime(time.mktime(time.strptime(str(min_reportDate), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) min_updateDate = getMinDatePara("matchedVideo", "updateDate") if not min_updateDate: min_updateDate = "2015-02-28" if str(max_reportDate) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))): break getMatchedVideo(min_reportDate, max_reportDate, min_updateDate) sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
def main(): sendToMe(subject="titleBased_infringViews start", body="titleBased_infringViews start") end_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") start_date = time.strftime("%Y-%m-%d", \ time.localtime(time.mktime(time.strptime(str(end_date), "%Y-%m-%d")) - 1 * 24 * 60 * 60)) start_date = "2015-02-28" updateViews(start_date, end_date) sendToMe(subject="titleBased_infringViews end", body="titleBased_infringViews end")
def main(): sendToMe(subject="titleBased_infringAllViews start", body="titleBased_infringAllViews start") cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" judgeFileExist(cfg_file=cfg_file, job_name="titleBased_infringAllViews") end_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at") #end_date = "2016-05-02" if not end_date: sendToMe(subject="matchedVideoViewCountCompletionAll has no data", body="matchedVideoViewCountCompletionAll has no data") start_date = time.strftime("%Y-%m-%d", \ time.localtime(time.mktime(time.strptime(str(end_date), "%Y-%m-%d")) - 10 * 24 * 60 * 60)) #start_date ="2015-02-28" updateViews(start_date, end_date) sendToMe(subject="titleBased_infringAllViews end", body="titleBased_infringAllViews end")
def getDataFromVT(): logger.info(": extract data from tracker2 start") date_para_TitleBasedRemoveNum_min = getMinDatePara(table_name = "TitleBasedRemoveNum", date_para = "takeoffDate") if date_para_TitleBasedRemoveNum_min == None: date_para_TitleBasedRemoveNum_min = "2015-02-28" date_para_TitleBasedRemoveNum_min = "2015-02-28" date_para_TitleBasedRemoveNum_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 0 * 24 * 60 * 60)) date_para_TitleBasedRemoveNum_dict = {"date_para_TitleBasedRemoveNum_min":date_para_TitleBasedRemoveNum_min, \ "date_para_TitleBasedRemoveNum_max":date_para_TitleBasedRemoveNum_max, "min_report_date": "2015-03-01"} vt_TitleBasedRemoveNum_SQL = """ select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime, CURRENT_TIMESTAMP as ETLDate from tracker2.matchedVideo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id and a.company_id = 14 and b.website_type = "ugc" and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 UNION ALL select reportDate, takeoffDate, trackingWebsite_id, trackingMeta_id, sum(removedNum) removedNum, sum(complianceTime) complianceTime, CURRENT_TIMESTAMP as ETLDate from (select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime from tracker2.matchedVideo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id and a.company_id = 14 and b.website_type = "hybrid" and a.count_send_notice > 0 and a.matchedFile_id = 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 union all select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d where a.trackingWebsite_id = b.id and d.matchedFile_id = a.matchedFile_id and a.company_id = 14 and b.website_type = "hybrid" and a.matchedFile_id > 0 and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4) as a group by 1,2 ,3 ,4 UNION ALL select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime, CURRENT_TIMESTAMP as ETLDate from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d where a.trackingWebsite_id = b.id and d.matchedFile_id = a.matchedFile_id and a.company_id = 14 and b.website_type = "cyberlocker" and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 """ %date_para_TitleBasedRemoveNum_dict vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") result = vtweb_mysql.queryCMD(vt_TitleBasedRemoveNum_SQL) except Exception, e: logger.debug(": extract data from vt for TitleBasedRemoveNum, %s" %e) sendToMe(subject = "TitleBasedRemove ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def main(): cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10*24*60*60: alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max} site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ %alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
logger = logging.getLogger("titleBased_remove") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBasedStaging/log/titleBased_remove.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBasedStaging/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sys.exit(0) ################################################################################################################################# logger.info(": extract data from tracker2 start") # extract dat from vtweb date_para_TitleBasedRemoveNum_min = getMinDatePara(table_name = "TitleBasedRemoveNum", date_para = "takeoffDate") if date_para_TitleBasedRemoveNum_min == None: date_para_TitleBasedRemoveNum_min = "2015-02-28" print date_para_TitleBasedRemoveNum_min date_para_TitleBasedRemoveNum_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 0 * 24 * 60 * 60)) date_para_TitleBasedRemoveNum_dict = {"date_para_TitleBasedRemoveNum_min":date_para_TitleBasedRemoveNum_min, \ "date_para_TitleBasedRemoveNum_max":date_para_TitleBasedRemoveNum_max, "min_report_date": "2015-03-01"} vt_TitleBasedRemoveNum_SQL = """ select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0
return getGlobalRank(html), getTopOneCountry(html) target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") if alexa_date_min == None: alexa_date_min = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) alexa_date_dict = { "alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max } site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0
def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" % cfg_file) sendToMe(subject="SiteBased_alexa ERROR", body="config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject="SiteBased_alexa start", body="extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime( str(alexa_date_max), "%Y-%m-%d")) - time.mktime( time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10 * 24 * 60 * 60: alexa_date_dict = { "alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max } site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ % alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo( url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD( insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject="update SiteBasedAlexa Error", body=re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" % alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" % e) sendToMe(subject="SiteBasedAlexa ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)