def updateSiteBasedAlexa(): sendToMe(subject = "update alexa data start", body = "update alexa data start") logger.info(" updateSiteBasedAlexa start") update_SQL1 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.reportDate > (select max(reportDate) from SiteBasedAlexa); """ update_SQL2 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.alexaTopCountry = "unknown" and a.alexaGlobalRank = 0; """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SQL1) target_mysql.commit() target_mysql.queryCMD(update_SQL2) target_mysql.commit() except Exception, e: sendToMe(subject = "update alexa data ERROR", body = e) logger.DEBUG("update alexa data %s" %e)
def updateSiteBasedAlexa(): sendToMe(subject="update alexa data start", body="update alexa data start") logger.info(" updateSiteBasedAlexa start") update_SQL1 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.reportDate > (select max(reportDate) from SiteBasedAlexa); """ update_SQL2 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.alexaTopCountry = "unknown" and a.alexaGlobalRank = 0; """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') target_mysql.queryCMD(update_SQL1) target_mysql.commit() target_mysql.queryCMD(update_SQL2) target_mysql.commit() except Exception, e: sendToMe(subject="update alexa data ERROR", body=e) logger.DEBUG("update alexa data %s" % e)
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedTrackingWebsite_SQL = """ select a.trackingWebsite_id, b.website_type as websiteType, a.display_name as websiteName, b.website_domain as websiteDomain, b.country_id, CURRENT_TIMESTAMP as ETLDate from tracker2.trackingWebsiteExtraInfo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedTrackingWebsite_SQL) except Exception, e: logger.debug( ": extract data from vt for dimension trackingWebsite, %s" % e) sendToMe(subject="TitleBasedTrackingWebsite ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getMatchedVideo(min_reportDate, min_updateDate): logger.info(" extract data from tracker2.matchedVideo start") date_dict = { "min_reportDate": min_reportDate, "min_updateDate": min_updateDate } try: get_data_sql = """ select id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at from tracker2.matchedVideo where company_id = 10 and created_at >= "2015-03-01" and (created_at > "%(min_reportDate)s" or (created_at <= "%(min_reportDate)s" and updated_at > "%(min_updateDate)s")) """ % date_dict vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) data = vtweb_mysql.queryCMD(get_data_sql) except Exception, e: logger.debug("extract data from tracker2.matchedVideo, %s" % e) sendToMe(subject="matchedVideo ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedTrackingWebsite_SQL = """ select a.trackingWebsite_id, b.website_type as websiteType, a.display_name as websiteName, b.website_domain as websiteDomain, b.country_id, CURRENT_TIMESTAMP as ETLDate from trackingWebsiteExtraInfo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedTrackingWebsite_SQL) except Exception, e: logger.debug(": extract data from vt for dimension trackingWebsite, %s" %e) sendToMe(subject = "TitleBasedTrackingWebsite ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def monitor(): logger.info("monitor complaince rate start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') monitor_SQL = """ select a.reportDate, a.trackingWebsite_id, a.title, sum(b.removedNum)/sum(a.infringingNum) as removedRate from TitleBased1 a, (select reportDate, trackingWebsite_id, Title, sum(removedNum) as removedNum from TitleBasedRemoveNum group by 1, 2, 3) b where a.reportDate=b.reportDate and a.trackingWebsite_id=b.trackingWebsite_id and a.title = b.title group by 1, 2, 3 having removedRate > 1; """ result = target_mysql.queryCMD(monitor_SQL) sj = "compliance rate " + str(time.strftime("%Y-%m-%d")) if result: bd = "compliance rate > 1\n" + str(result) sendToMe(subject = sj + " ERROR", body = bd) else: bd = "compliance rate has no data greater than 1" sendToMe(subject = sj + " GOOD", body = bd) except Exception, e: logger.debug(" error, %s" %e) sendToMe(subject = "monitor complaince rate ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletion start") date_dict ={"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, current_timestamp as ETLDate from matchedVideoViewCountCompletion where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """insert into matchedVideoViewCountCompletion (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, ETLDate) values(%s, %s, %s, %s, %s, %s, %s) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletion, %s" %e) sendToMe(subject = "titleBased_matchedVideoViewCountCompletio ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletion start") date_dict ={"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, current_timestamp as ETLDate from matchedVideoViewCountCompletion where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """insert into matchedVideoViewCountCompletion (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, ETLDate) values(%s, %s, %s, %s, %s, %s, %s) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletion, %s" %e) sendToMe(subject = "titleBased_infringAllViews ERROR", body = str(e).replace("\"", "").replace("'", "").replace("!", "")) sys.exit(0)
def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletionAll start") date_dict ={"start_date": start_date, "end_date": end_date} target_server_section = "target_server_staging" insight_server_section = "insight_master" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) insight_host, insight_user, insight_passwd, insight_port, insight_db = getConfMysqlInfo(insight_server_section) try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, current_timestamp as ETLDate from matchedVideoViewCountCompletionAll_fox137 where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideoViewCountCompletionAll (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s) """ commitInTurn(commit_num=100000, data=data, executeFun=target_mysql.executeManyCMD, \ commitFun=target_mysql.commit, executeSQL=insert_sql) #target_mysql.executeManyCMD(insert_sql, data) #target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletionAll, %s" %e) sendToMe(subject = "titleBased_matchedVideoViewCountCompletionAll ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedMeta_SQL = """ select a.id as trackingMeta_id, a.meta_type, a.meta_title as title, CURRENT_TIMESTAMP as ETLDate from mddb.meta as a, tracker2.metaExtraInfo as b where a.company_id = 14 and b.company_id = 14 and a.id = b.meta_id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedMeta_SQL) except Exception, e: logger.debug(": extract data from vt for dimension meta, %s" % e) sendToMe(subject="titleBased_meta ERROR", body=e) sys.exit(0)
def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletionAll start") date_dict ={"start_date": start_date, "end_date": end_date} target_server_section = "target_server_staging" insight_server_section = "insight" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) insight_host, insight_user, insight_passwd, insight_port, insight_db = getConfMysqlInfo(insight_server_section) try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, current_timestamp as ETLDate from matchedVideoViewCountCompletionAll where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideoViewCountCompletionAll (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s) """ commitInTurn(commit_num=100000, data=data, executeFun=target_mysql.executeManyCMD, \ commitFun=target_mysql.commit, executeSQL=insert_sql) #target_mysql.executeManyCMD(insert_sql, data) #target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletionAll, %s" %e) sendToMe(subject = "titleBased_matchedVideoViewCountCompletionAll ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateCountry(): sendToMe(subject = "update Country start", body = "update Country table: SiteBased") logger.info("update Country for table SiteBased start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedCountry as b set a.hostCountry = b.countryName where a.country_id = b.country_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() except Exception, e: sendToMe(subject = "update TrackingWebsite ERROR", body = e) logger.DEBUG("update TrackingWebsite data %s" %e)
def loadDataToTitleBasedRemoveNum1(): logger.info(":extract data from TitleBasedRemoveNum start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_SQL = """ select a.reportDate, a.takeoffDate, a.trackingWebsite_id, ifnull(c.displayName, a.websiteName) as websiteName, a.websiteType, ifnull(b.mapTitle, a.title) as title, sum(removedNum) as removedNum, sum(complianceTime) as complianceTime, current_timestamp as ETLDate from (select a.reportDate, a.takeoffDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, sum(removedNum) as removedNum, sum(complianceTime) as complianceTime from TitleBasedRemoveNumTmp as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5, 6) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle left join SiteMap as c on a.trackingWebsite_id = c.trackingWebsite_id group by 1, 2, 3, 4, 5, 6 """ target_mysql.queryNoData("delete from TitleBasedRemoveNum") aggregate_result = target_mysql.queryCMD(aggregate_SQL) insertUpdate_SQL = """ INSERT INTO TitleBasedRemoveNum (reportDate, takeoffDate, trackingWebsite_id, websiteName, websiteType, title, removedNum, complianceTime, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE removedNum = VALUES(removedNum), complianceTime = VALUES(complianceTime), ETLDate = VALUES(ETLDate) """ target_mysql.queryNoData("delete from TitleBasedRemoveNum") commitInTurn(commit_num = 50000, data = aggregate_result, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insertUpdate_SQL) #target_mysql.insertUpdateCMD(insertUpdate_SQL, aggregate_result) #target_mysql.commit() except Exception, e: logger.debug(" load data to TitleBasedRemoveNum, %s" %e) sendToMe(subject = "TitleBasedRemove ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateViews(start_date, end_date): logger.info(" aggregate data from matchedVideoViewCountCompletion start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) date_dict = {"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select a.reportDate, a.trackingWebsite_id, ifnull(c.displayName, a.websiteName) as websiteName, a.websiteType, ifnull(b.mapTitle, a.title) title, sum(a.infringingViews) as infringingViews, sum(a.reportedViews) as reportedViews, current_timestamp as ETLDate from (select a.report_at as reportDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, sum(if(a.hide_flag = 2, a.view_count, 0)) as infringingViews, sum(a.view_count) as reportedViews from matchedVideoViewCountCompletionAll as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id and c.websiteType = 'ugc' and a.report_at > "%(start_date)s" and a.report_at <= "%(end_date)s" group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle left join SiteMap as c on a.trackingWebsite_id = c.trackingWebsite_id group by 1, 2, 3, 4, 5 """ % date_dict target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name=target_db, port=target_port, charset='utf8') data = target_mysql.queryCMD(get_data_sql) insert_sql = """insert into TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingViews, reportedViews, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingViews = values(infringingViews), reportedViews = values(reportedViews), ETLDate = values(ETLDate), websiteName = VALUES(websiteName) """ commitInTurn(commit_num=100000, data=data, executeFun=target_mysql.executeManyCMD, \ commitFun=target_mysql.commit, executeSQL=insert_sql) except Exception, e: logger.debug("aggregate data to TitleBased1 ERROR , %s" % e) sendToMe(subject="titleBased_infringAllViews ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getData(sql): logger.info(" extract data from tracker2.metaExtraInfo start") try: vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(sql) except Exception, e: logger.debug("extract data from tracker2.metaExtraInfo, %s" %e) sendToMe(subject = "metaExtraInfo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getData(sql, section = "vtweb"): logger.info(" extract data from tracker2 start") try: vtweb_tracker2_section = section vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(sql) except Exception, e: logger.debug("extract data from tracker2, %s" %e) sendToMe(subject = "extract data from tracker2 ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateTrackingWebsite(): sendToMe(subject = "update TrackingWebsite start", body = "update TrackingWebsite table: SiteBased TitleBased1 TitleBasedRemoveNum1") logger.info("update TrackingWebsite start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteDomain = b.websiteDomain, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBased1_SQL = """ update TitleBased1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBasedRemoveNum1_SQL = """ update TitleBasedRemoveNum1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBased1_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBasedRemoveNum1_SQL) target_mysql.commit() except Exception, e: sendToMe(subject = "update TrackingWebsite ERROR", body = e) logger.DEBUG("update TrackingWebsite data %s" %e)
def getMatchedVideo(start_date, end_date): logger.info(" extral data from tracker2.matchedVideo start") date_dict ={"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select a.id as matchedVideo_id, trackingMeta_id, trackingWebsite_id, date_format(first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, date_format(a.created_at, "%%Y-%%m-%%d") as created_at, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, tracker2.metaExtraInfo as b , mddb.trackingWebsite as c where a.company_id =14 and b.company_id = 14 and c.website_type = "ugc" and a.trackingMeta_id = b.meta_id and a.trackingWebsite_id = c.id and a.hide_flag = 2 and a.created_at >= "2015-03-01" and count_send_notice > 0 and date_format(a.first_send_notice_date, "%%Y-%%m-%%d") > "%(start_date)s" and date_format(a.first_send_notice_date, "%%Y-%%m-%%d") <= "%(end_date)s" """ %date_dict vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") data = vtweb_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """insert into matchedVideo (matchedVideo_id, trackingMeta_id, trackingWebsite_id, firstSendNoticeDate, reportDate, ETLDate) values(%s, %s, %s, %s, %s, %s) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.matchedVideo, %s" %e) sendToMe(subject = "titleBased_infringAllViews ERROR", body = str(e).replace("\"", "").replace("'", "").replace("!", "")) sys.exit(0)
def getMinDatePara(table_name, date_para, db = "FOX_DASHBOARD"): #get max report date from table TitleBased target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user,passwd=target_passwd, port = target_port, db_name = db) sql = "select max(%s) from %s" %(date_para, table_name) min_date = target_mysql.queryCMD(sql)[0][0] #if min_date == None: # min_date = time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365)) except Exception , e: logger.debug(": get last max report date from %s, %s" %(table_name, e)) sendToMe(subject = table_name, body = re.sub(r'\'|"|!', "", str(e)))
def addAlexa(d): sendToMe(subject = "add alexa data start", body = "add alexa data start") logger.info(" addSiteBasedAlexa start") get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" %d insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);" try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_result = target_mysql.queryCMD(get_alexa_sql) target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result) target_mysql.commit() except Exception, e: sendToMe(subject = "add alexa data ERROR", body = e) logger.DEBUG("add alexa data %s" %e)
def updateCountry(): sendToMe(subject="update Country start", body="update Country table: SiteBased") logger.info("update Country for table SiteBased start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedCountry as b set a.hostCountry = b.countryName where a.country_id = b.country_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() except Exception, e: sendToMe(subject="update TrackingWebsite ERROR", body=e) logger.DEBUG("update TrackingWebsite data %s" % e)
def getData(): logger.info(" extract data from tracker2.metaExtraInfo start") try: get_data_sql = """ select meta_id, company_id, display_name, priority_type, created_at, updated_at from tracker2.metaExtraInfo where company_id = 14 """ vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(get_data_sql) except Exception, e: logger.debug("extract data from tracker2.metaExtraInfo, %s" %e) sendToMe(subject = "metaExtraInfo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getMinDatePara(table_name, date_para): #get max report date from table TitleBased target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, port=target_port, db_name=target_db) sql = "select max(%s) from %s" % (date_para, table_name) min_date = target_mysql.queryCMD(sql)[0][0] #if min_date == None: # min_date = time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365)) except Exception, e: logger.debug(": get last max report date from %s, %s" % (table_name, e))
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedMeta_SQL = """ select a.id as trackingMeta_id, a.meta_type, a.meta_title as title, CURRENT_TIMESTAMP as ETLDate from mddb.meta as a, tracker2.metaExtraInfo as b where a.company_id = 14 and b.company_id = 14 and a.id = b.meta_id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedMeta_SQL) except Exception, e: logger.debug(": extract data from vt for dimension meta, %s" %e) sendToMe(subject = "titleBased_meta ERROR", body = e) sys.exit(0)
def addAlexa(d): sendToMe(subject="add alexa data start", body="add alexa data start") logger.info(" addSiteBasedAlexa start") get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" % d insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);" try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_result = target_mysql.queryCMD(get_alexa_sql) target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result) target_mysql.commit() except Exception, e: sendToMe(subject="add alexa data ERROR", body=e) logger.DEBUG("add alexa data %s" % e)
def get_matchedVideo(min_reportDate, min_updateDate): logger.info(" extract data from tracker2.matchedVideo start") date_dict = {"min_reportDate": min_reportDate, "min_updateDate": min_updateDate} try: get_data_sql = """ select id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at from tracker2.matchedVideo where company_id = 10 and created_at >= "2015-03-01" and (created_at > "%(min_reportDate)s" or (created_at <= "%(min_reportDate)s" and updated_at > "%(min_updateDate)s")) """ %date_dict vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(get_data_sql) except Exception, e: logger.debug("extract data from tracker2.matchedVideo, %s" %e) sendToMe(subject = "matchedVideo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getData(): logger.info(" extract data from tracker2.metaExtraInfo start") try: get_data_sql = """ select meta_id, company_id, display_name, priority_type, created_at, updated_at from tracker2.metaExtraInfo where company_id = 14 """ vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) data = vtweb_mysql.queryCMD(get_data_sql) except Exception, e: logger.debug("extract data from tracker2.metaExtraInfo, %s" % e) sendToMe(subject="metaExtraInfo ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateTrackingWebsite(): sendToMe( subject="update TrackingWebsite start", body= "update TrackingWebsite table: SiteBased TitleBased1 TitleBasedRemoveNum1" ) logger.info("update TrackingWebsite start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteDomain = b.websiteDomain, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBased1_SQL = """ update TitleBased1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBasedRemoveNum1_SQL = """ update TitleBasedRemoveNum1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBased1_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBasedRemoveNum1_SQL) target_mysql.commit() except Exception, e: sendToMe(subject="update TrackingWebsite ERROR", body=e) logger.DEBUG("update TrackingWebsite data %s" % e)
and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 """ %date_para_TitleBasedRemoveNum_dict #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-5:00'") result = vtweb_mysql.queryCMD(vt_TitleBasedRemoveNum_SQL) except Exception, e: logger.debug(": extract data from vt for TitleBasedRemoveNum, %s" %e) sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 end") logger.info(":load data to TitleBasedRemoveNum start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
def main(): cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10*24*60*60: alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max} site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ %alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
from mddb.meta as a, tracker2.metaExtraInfo as b where a.company_id = 14 and b.company_id = 14 and a.id = b.meta_id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedMeta_SQL) except Exception, e: logger.debug(": extract data from vt for dimension meta, %s" % e) sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 start") logger.info(":load data to TitleBasedMeta start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user,
def aggregateDataToTitleBased1(): logger.info(" aggregate data from TitleBased to TitleBased1 start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBased1_SQL = """ select a.reportDate, a.trackingWebsite_id, a.websiteName, a.websiteType, ifnull(b.mapTitle, a.title) title, min(a.tier) as tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from (select a.reportDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, min(a.tier) tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from TitleBasedTmp as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a. title = b.metaTitle group by 1, 2, 3, 4, 5 """ #%date_para_TitleBased1_dict TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) insert_TitleBased1_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, tier, matchedNum, matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero, clipDurationSum, clipDurationInfringingSum, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum), infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), infringingNumDurationNoZero = VALUES(infringingNumDurationNoZero), clipDurationInfringingSum = VALUES(clipDurationInfringingSum) """ commitInTurn(commit_num = 50000, data = TitleBased1_result, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insert_TitleBased1_SQL) #target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result) #target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sendToMe(subject = "titleBased_titleBased ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
b.websiteType, title, sum(CMSInfringingNum) as infringingNumCMS, sum(CMSReportViews) as reportedViewsCMS, current_timestamp as ETLDate from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.trackingWebsite_id = 1 and a.Date_ID >= '%(date_para_CMS_min)s' and a.Date_ID < '%(date_para_CMS_max)s' group by 1, 2, 3, 4, 5 """ % date_para_CMS_dict CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL) insert_CMS_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingNumCMS, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" % e) sendToMe(subject="TitleBased_views ERROR", body=e) sys.exit(0)
b.title, min(a.tier) tier, sum(matchedNum) as matchedNum, sum(infringingNum) as infringingNum, sum(clipDurationSum) as clipDurationSum, current_timestamp as ETLDate from TitleBased as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a. title = b.metaTitle group by 1, 2, 3, 4, 5, 6 """ #%date_para_TitleBased1_dict TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) insert_TitleBased1_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, tier, matchedNum, infringingNum, clipDurationSum, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum), infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally:
def getMatchedVideo(min_reportDate, max_reportDate, min_updateDate): logger.info(" extral data from tracker2.matchedVideo start") date_dict ={"min_reportDate": min_reportDate, "max_reportDate": max_reportDate, "min_updateDate": min_updateDate} try: get_data_sql = """ select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") > "%(min_reportDate)s" and date_format(a.created_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" union all select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") <= "%(min_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") > "%(min_updateDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" """ %date_dict vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") data = vtweb_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideo (matchedVideo_id, trackingMeta_id, trackingWebsite_id, firstSendNoticeDate, hideFlag, countSendNotice, reportDate, updateDate, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE firstSendNoticeDate = values(firstSendNoticeDate), hideFlag = values(hideFlag), countSendNotice = values(countSendNotice), reportDate = values(reportDate), updateDate = values(updateDate), ETLDate =values(ETLDate) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.matchedVideo, %s" %e) sendToMe(subject = "titleBased_infringAllViews ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
b.trackingWebsite_id, b.websiteName, b.websiteType, title, sum(ReportedViews) as reportedViews, current_timestamp as ETLDate from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.Date_ID >= '%(date_para_reportedViews_min)s' and a.Date_ID < '%(date_para_reportedViews_max)s' group by 1, 2, 3, 4, 5 """ %date_para_reportedViews_dict reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL) insert_reportedViews_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_reportedViews_SQL, reportedViews_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn()
sendToMe(subject = "titleBased_country ERROR", body = "config file not exists") sys.exit(0) vt_TitleBasedTrackingWebsite_SQL = """ select id as country_id, -- region, country_name as countryName -- country_name as countryName, -- national_flag as nationalFlag, -- CURRENT_TIMESTAMP as ETLDate from mddb.country where id = 2 """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) # vtweb_mysql.execute("set names utf8") result = vtweb_mysql.queryCMD(vt_TitleBasedTrackingWebsite_SQL) print result, repr(result[0][2]), result[0][2] target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') executeManyCMD("insert into test.a values ("%s", "%s")") except Exception, e: sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn()
# extract dat from vtweb vt_TitleBasedTrackingWebsite_SQL = """ select id as country_id, region, country_name as countryName, national_flag as nationalFlag, CURRENT_TIMESTAMP as ETLDate from mddb.country """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedTrackingWebsite_SQL) except Exception, e: logger.debug(": extract data from vt for dimension country, %s" %e) sendToMe(subject = "titleBased_country ERROR", body = e) sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 start") logger.info(":load data to TitleBasedCountry start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') insertUpdate_SQL = """
def getMatchedVideo(min_reportDate, max_reportDate, min_updateDate): logger.info(" extral data from tracker2.matchedVideo start") date_dict = { "min_reportDate": min_reportDate, "max_reportDate": max_reportDate, "min_updateDate": min_updateDate } try: get_data_sql = """ select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") > "%(min_reportDate)s" and date_format(a.created_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" and a.trackingWebsite_id = b.id union all select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") <= "%(min_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") > "%(min_updateDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" and a.trackingWebsite_id = b.id """ % date_dict vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") data = vtweb_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideo (matchedVideo_id, trackingMeta_id, trackingWebsite_id, firstSendNoticeDate, hideFlag, countSendNotice, reportDate, updateDate, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE firstSendNoticeDate = values(firstSendNoticeDate), hideFlag = values(hideFlag), countSendNotice = values(countSendNotice), reportDate = values(reportDate), updateDate = values(updateDate), ETLDate =values(ETLDate) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.matchedVideo, %s" % e) sendToMe(subject="titleBased_infringAllViews ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def aggregateDataToTitleBased1(): logger.info(" aggregate data from TitleBased to TitleBased1 start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBased1_SQL = """ select a.reportDate, a.trackingWebsite_id, ifnull(c.displayName, a.websiteName) as websiteName, a.websiteType, ifnull(b.mapTitle, a.title) title, min(a.tier) as tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from (select a.reportDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, min(a.tier) tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from TitleBasedTmp as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle left join SiteMap as c on a.trackingWebsite_id = c.trackingWebsite_id group by 1, 2, 3, 4, 5 """ #%date_para_TitleBased1_dict TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) target_mysql.queryNoData("delete from TitleBased1;") insert_TitleBased1_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, tier, matchedNum, matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero, clipDurationSum, clipDurationInfringingSum, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum), infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), infringingNumDurationNoZero = VALUES(infringingNumDurationNoZero), clipDurationInfringingSum = VALUES(clipDurationInfringingSum), websiteName = VALUES(websiteName), tier = VALUES(tier) """ commitInTurn(commit_num = 50000, data = TitleBased1_result, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insert_TitleBased1_SQL) #target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result) #target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sendToMe(subject = "titleBased_titleBased ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getDataFromVT(): logger.info(": extract data from tracker2 start") date_para_TitleBasedRemoveNum_min = getMinDatePara(table_name = "TitleBasedRemoveNum", date_para = "takeoffDate") if date_para_TitleBasedRemoveNum_min == None: date_para_TitleBasedRemoveNum_min = "2015-02-28" date_para_TitleBasedRemoveNum_min = "2015-02-28" date_para_TitleBasedRemoveNum_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 0 * 24 * 60 * 60)) date_para_TitleBasedRemoveNum_dict = {"date_para_TitleBasedRemoveNum_min":date_para_TitleBasedRemoveNum_min, \ "date_para_TitleBasedRemoveNum_max":date_para_TitleBasedRemoveNum_max, "min_report_date": "2015-03-01"} vt_TitleBasedRemoveNum_SQL = """ select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime, CURRENT_TIMESTAMP as ETLDate from tracker2.matchedVideo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id and a.company_id = 14 and b.website_type = "ugc" and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 UNION ALL select reportDate, takeoffDate, trackingWebsite_id, trackingMeta_id, sum(removedNum) removedNum, sum(complianceTime) complianceTime, CURRENT_TIMESTAMP as ETLDate from (select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime from tracker2.matchedVideo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id and a.company_id = 14 and b.website_type = "hybrid" and a.count_send_notice > 0 and a.matchedFile_id = 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 union all select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d where a.trackingWebsite_id = b.id and d.matchedFile_id = a.matchedFile_id and a.company_id = 14 and b.website_type = "hybrid" and a.matchedFile_id > 0 and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4) as a group by 1,2 ,3 ,4 UNION ALL select date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate, a.trackingWebsite_id, a.trackingMeta_id, count(*) removedNum, sum(case when a.first_send_notice_date >0 and a.takeoff_time>0 then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0 end) as complianceTime, CURRENT_TIMESTAMP as ETLDate from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d where a.trackingWebsite_id = b.id and d.matchedFile_id = a.matchedFile_id and a.company_id = 14 and b.website_type = "cyberlocker" and a.count_send_notice > 0 and hide_flag = 2 and a.first_send_notice_date < a.takeoff_time and a.first_send_notice_date > 0 and a.created_at >= "%(min_report_date)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s" and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s" group by 1, 2, 3, 4 """ %date_para_TitleBasedRemoveNum_dict vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") result = vtweb_mysql.queryCMD(vt_TitleBasedRemoveNum_SQL) except Exception, e: logger.debug(": extract data from vt for TitleBasedRemoveNum, %s" %e) sendToMe(subject = "TitleBasedRemove ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
sum(infringingNumCMS) as infringingNumCMS, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, sum(reportedViews) as reportedViews, sum(infringingViews) as infringingViews, sum(reportedViewsCMS) as reportedViewsCMS, CURRENT_TIMESTAMP as ETLDate from TitleBased1 as a, TitleBasedTrackingWebsite as b, TitleBasedCountry as c where a.trackingWebsite_id = b.trackingWebsite_id and b.country_id = c.country_id and(a.websiteType = "ugc" or a.websiteType = 'cyberlocker' or a.websiteType = 'hybrid') group by 1, 2, 3, 4, 5, 6, 7, 8) as a left join SiteMap as b on a.trackingWebsite_id = b.trackingWebsite_id group by 1, 2, 3, 4, 5, 6, 7, 8 """ aggregate_TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) target_mysql.queryNoData("delete from SiteBased") insert_SiteBased_SQL = """ INSERT INTO SiteBased (reportDate, trackingWebsite_id, websiteName, websiteType, websiteDomain, country_id, hostCountry, title, matchedNum, matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero, infringingNumCMS, clipDurationSum, clipDurationInfringingSum, reportedViews, infringingViews, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), matchedNum = VALUES(matchedNum), matchedNumDurationNoZero = values(matchedNumDurationNoZero),
AND date_format(a.created_at, "%%Y-%%m-%%d") < "%(date_para_TitleBased_reportDate_max)s" group by 1, 2, 3, 4 """ % date_para_TitleBased_report_dict try: #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user, passwd=vt_passwd, port=vt_port, db_name=vt_db) # get data from vtweb vtweb_mysql.queryCMD("set time_zone = '-7:00'") result_report = vtweb_mysql.queryCMD(vt_TitleBased_SQL) except Exception, e: logger.debug(": mysql config file section or option not exists, %s" % e) sendToMe(subject="TitleBased ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 reportDate end") # put data to TitleBased logger.info(":load data to TitleBasedTmp reportDate start") try:
alexa_date_max = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) alexa_date_dict = { "alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max } site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ % alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) print site_SQL print "site num", len(site_info) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break
AND a.trackingWebsite_id = b.id AND a.trackingMeta_id = c.meta_id AND a.company_id = 10 AND b.website_type = 'cyberlocker' AND date_format(a.created_at, "%%Y-%%m-%%d") > "%(date_para_TitleBased_reportDate_min)s" AND date_format(a.created_at, "%%Y-%%m-%%d") < "%(date_para_TitleBased_reportDate_max)s" group by 1, 2, 3, 4 """ %date_para_TitleBased_report_dict try: #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) # get data from vtweb vtweb_mysql.queryCMD("set time_zone = '-7:00'") result_report = vtweb_mysql.queryCMD(vt_TitleBased_SQL) except Exception, e: logger.debug(": mysql config file section or option not exists, %s" %e) sendToMe(subject = "TitleBased ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0) finally: vtweb_mysql.closeCur() vtweb_mysql.closeConn() logger.info(": extract data from tracker2 reportDate end") # put data to TitleBased logger.info(":load data to TitleBasedTmp reportDate start") try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
c.countryName as hostCountry, a.title, sum(matchedNum) as matchedNum, sum(infringingNum) as infringingNum, sum(infringingNumCMS) as infringingNumCMS, sum(clipDurationSum) as clipDurationSum, sum(reportedViews) as reportedViews, sum(reportedViewsCMS) as reportedViewsCMS, CURRENT_TIMESTAMP as ETLDate from VIACOM_DASHBOARD.TitleBased1 as a, VIACOM_DASHBOARD.TitleBasedTrackingWebsite as b, VIACOM_DASHBOARD.TitleBasedCountry as c where a.trackingWebsite_id = b.trackingWebsite_id and b.country_id = c.country_id and(a.websiteType = "ugc" or a.websiteType = 'cyberlocker' or a.websiteType = 'hybrid') group by 1, 2, 3, 4, 5, 6, 7, 8 """ aggregate_TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) insert_SiteBased_SQL = """ INSERT INTO SiteBased (reportDate, trackingWebsite_id, websiteName, websiteType, websiteDomain, country_id, hostCountry, title, matchedNum, infringingNum, infringingNumCMS, clipDurationSum, reportedViews, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), matchedNum = VALUES(matchedNum), infringingNum = VALUES(infringingNum), infringingNumCMS = VALUES(infringingNumCMS), clipDurationSum = VALUES(clipDurationSum), reportedViews = VALUES(reportedViews), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """
b.trackingWebsite_id, b.websiteName, b.websiteType, title, sum(ReportedViews) as reportedViews, current_timestamp as ETLDate from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.Date_ID >= '%(date_para_reportedViews_min)s' and a.Date_ID < '%(date_para_reportedViews_max)s' group by 1, 2, 3, 4, 5 """ % date_para_reportedViews_dict reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL) insert_reportedViews_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_reportedViews_SQL, reportedViews_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" % e) sendToMe(subject="titleBased_views ERROR", body=e) sys.exit(0) finally:
b.websiteType, title, sum(CMSInfringingNum) as infringingNumCMS, sum(CMSReportViews) as reportedViewsCMS, current_timestamp as ETLDate from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b where a.trackingWebsite_id = b.trackingWebsite_id and a.WebsiteType = 'ugc' and b.WebsiteType = 'ugc' and a.trackingWebsite_id = 1 and a.Date_ID >= '%(date_para_CMS_min)s' and a.Date_ID < '%(date_para_CMS_max)s' group by 1, 2, 3, 4, 5 """ %date_para_CMS_dict CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL) insert_CMS_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingNumCMS, reportedViewsCMS, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS), reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate) """ target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result) target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sys.exit(0) finally:
def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" % cfg_file) sendToMe(subject="SiteBased_alexa ERROR", body="config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject="SiteBased_alexa start", body="extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port, charset='utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime( "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime( str(alexa_date_max), "%Y-%m-%d")) - time.mktime( time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10 * 24 * 60 * 60: alexa_date_dict = { "alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max } site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ % alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo( url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD( insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject="update SiteBasedAlexa Error", body=re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" % alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" % e) sendToMe(subject="SiteBasedAlexa ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)