def main():
    sendToMe(subject = "matchedVideoViewCountCompletionAll start", body = "matchedVideoViewCountCompletionAll start")
    cfg_file = "/Job/HBO/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    job_name = "titleBased_matchedVideoViewCountCompletionAll"
    judgeFileExist(cfg_file, job_name)


    insight_start_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at")
    if not insight_start_date:
        insight_start_date = "2015-02-28"

    while True:
        if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))):
            break
        insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60))

        getInsightViews(start_date = insight_start_date, end_date = insight_end_date)
        tmp_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at")
        if not tmp_date:
            tmp_date = "2015-02-28"
        if tmp_date == insight_start_date:
            sendToMe(subject = "matchedVideoViewCountCompletionAll Error", body = "insight2.1 matchedVideoViewCountCompletionAll no data")
            break
        else:
            insight_start_date = tmp_date

    sendToMe(subject = "matchedVideoViewCountCompletionAll end", body = "matchedVideoViewCountCompletionAll end")
コード例 #2
0
def dataTo_matchedVideoTmp():
    logger.info("load data target matchedVideoTmp start")
    sendToMe(subject = "matchedVideoTmp start", body = "matchedVideoTmp start")

    target_db = "tracker2"
    min_reportDate = getMinDatePara("matchedVideoTmp", "created_at", db = target_db)
    min_updateDate = getMinDatePara("matchedVideoTmp", "updated_at", db = target_db)
    if not min_reportDate:
        min_reportDate = "2015-03-01"
    if not min_updateDate:
        min_updateDate = "0000-00-00 00:00:00"

    data = get_matchedVideo(min_reportDate, min_updateDate)
    insert_sql = """
        insert into matchedVideoTmp
          (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, 
            count_send_notice, first_send_notice_date, takeoff_time, hide_flag,
            clip_duration, matchedFile_id, meta_title, created_at, updated_at)
         values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
         ON DUPLICATE KEY UPDATE    
            trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id),
            view_count = values(view_count), count_send_notice = values(count_send_notice),
            first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time),
            hide_flag = values(hide_flag), clip_duration = values(clip_duration),
            matchedFile_id = values(matchedFile_id), meta_title = values(meta_title),
            created_at = values(created_at), updated_at = values(updated_at)
    """
    dataToTarget(data, target_db, insert_sql)

    sendToMe(subject = "matchedVideoTmp end", body = "matchedVideoTmp end")
    logger.info("load data target matchedVideoTmp end")
def main():
    sendToMe(subject="matchedVideoViewCountCompletion start",
             body="matchedVideoViewCountCompletion start")
    insight_start_date = getMinDatePara("matchedVideoViewCountCompletion",
                                        "report_at")
    if not insight_start_date:
        insight_start_date = "2015-02-28"

    while True:
        if str(insight_start_date) >= str(
                time.strftime('%Y-%m-%d',
                              time.localtime(time.time() - 1 * 24 * 60 * 60))):
            break
        insight_end_date = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d"))
                + 1 * 24 * 60 * 60))

        print insight_start_date, insight_end_date
        getInsightViews(start_date=insight_start_date,
                        end_date=insight_end_date)
        insight_start_date = getMinDatePara("matchedVideoViewCountCompletion",
                                            "report_at")

    sendToMe(subject="matchedVideoViewCountCompletion end",
             body="matchedVideoViewCountCompletion end")
def main():
    sendToMe(subject = "matchedVideoViewCountCompletionAll start", body = "matchedVideoViewCountCompletionAll start")
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    job_name = "titleBased_matchedVideoViewCountCompletionAll"
    judgeFileExist(cfg_file, job_name)


    insight_start_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at")
    if not insight_start_date:
        insight_start_date = "2015-04-30"

    while True:
        if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))):
            break
        insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60))

        getInsightViews(start_date = insight_start_date, end_date = insight_end_date)
        tmp_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at")
        if not tmp_date:
            tmp_date = "2015-02-28"
        if tmp_date == insight_start_date:
            sendToMe(subject = "matchedVideoViewCountCompletionAll Error", body = "insight2.1 matchedVideoViewCountCompletionAll no data")
            break
        else:
            insight_start_date = tmp_date

    sendToMe(subject = "matchedVideoViewCountCompletionAll end", body = "matchedVideoViewCountCompletionAll end")
コード例 #5
0
def dataTo_matchedVideoTmp():
    logger.info("load data target matchedVideoTmp start")
    sendToMe(subject = "matchedVideoTmp start", body = "matchedVideoTmp start")

    target_db = "FOX_DASHBOARD"
    min_reportDate = getMinDatePara("matchedVideoTmp", "created_at", db = target_db)
    min_updateDate = getMinDatePara("matchedVideoTmp", "updated_at", db = target_db)
    if not min_reportDate:
        min_reportDate = "2015-03-01"
    if not min_updateDate:
        min_updateDate = "0000-00-00 00:00:00"

    data = get_matchedVideo(min_reportDate, min_updateDate)
    insert_sql = """
        insert into matchedVideoTmp
          (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, 
            count_send_notice, first_send_notice_date, takeoff_time, hide_flag,
            clip_duration, matchedFile_id, meta_title, created_at, updated_at)
         values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
         ON DUPLICATE KEY UPDATE    
            trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id),
            view_count = values(view_count), count_send_notice = values(count_send_notice),
            first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time),
            hide_flag = values(hide_flag), clip_duration = values(clip_duration),
            matchedFile_id = values(matchedFile_id), meta_title = values(meta_title),
            created_at = values(created_at), updated_at = values(updated_at)
    """
    dataToTarget(data, target_db, insert_sql)

    sendToMe(subject = "matchedVideoTmp end", body = "matchedVideoTmp end")
    logger.info("load data target matchedVideoTmp end")
コード例 #6
0
def main():
	sendToMe(subject = "titleBased_infringAllViews start", body = "titleBased_infringAllViews start")

	matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate")
	if not matchedVideo_start_date:
		matchedVideo_start_date = "2015-02-28"

	while True:
		if str(matchedVideo_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))):
			break
		matchedVideo_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(matchedVideo_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60))

		getMatchedVideo(start_date = matchedVideo_start_date, end_date = matchedVideo_end_date)
		matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate")
	# ---------------------------------------------------------------------------------------------------------------------
	insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at")
	if not insight_start_date:
		insight_start_date = "2015-02-28"

	while True:
		if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))):
			break
		insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60))
		
		getInsightViews(start_date = insight_start_date, end_date = insight_end_date)
		insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at")

	sendToMe(subject = "titleBased_infringAllViews end", body = "titleBased_infringAllViews end")
def main():
	sendToMe(subject = "matchedVideoViewCountCompletion start", body = "matchedVideoViewCountCompletion start")
	insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at")
	if not insight_start_date:
		insight_start_date = "2015-02-28"

	while True:
		if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 2 * 24 * 60 * 60))):
			break
		insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60))
		
		getInsightViews(start_date = insight_start_date, end_date = insight_end_date)
		insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at")

	sendToMe(subject = "matchedVideoViewCountCompletion end", body = "matchedVideoViewCountCompletion end")
コード例 #8
0
def main():
    sendToMe(subject="matchedVideo start", body="matchedVideo start")
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    job_name = "matchedVideo"
    judgeFileExist(cfg_file, job_name)

    target_db = "tracker2"
    min_reportDate = getMinDatePara("matchedVideo", "created_at", db=target_db)
    min_updateDate = getMinDatePara("matchedVideo", "updated_at", db=target_db)

    if not min_reportDate:
        min_reportDate = "2015-03-01"
    if not min_updateDate:
        min_updateDate = "0000-00-00 00:00:00"

    data = getMatchedVideo(min_reportDate, min_updateDate)
    dataToTarget(data, target_db)
    sendToMe(subject="matchedVideo end", body="matchedVideo end")
コード例 #9
0
ファイル: test.py プロジェクト: smartbrandnew/vobileETLCode
def main():
    sendToMe(subject = "matchedVideo start", body = "matchedVideo start")
    cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    job_name = "matchedVideo"
    judgeFileExist(cfg_file, job_name)

    target_db = "tracker2"
    min_reportDate = getMinDatePara("Test", "created_at", db = target_db)
    min_updateDate = getMinDatePara("Test", "updated_at", db = target_db)

    if not min_reportDate:
        min_reportDate = "2015-03-01"
    if not min_updateDate:
        min_updateDate = "0000-00-00 00:00:00"
    min_reportDate = '2016-05-08'
    data = getMatchedVideo(min_reportDate, min_updateDate)
    dataToTarget(data, target_db)
    sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
コード例 #10
0
def updateSiteBasedAlexa():
    logger.info("start")
    alexa_report_date_para = getMinDatePara("SiteBasedAlexa", "reportDate")
    update_SQL = """
      update SiteBased as a, SiteBasedAlexa as b
      set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry
      where a.trackingWebsite_id = b.trackingWebsite_id
        and b.reportDate = (select max(reportDate) from SiteBasedAlexa)
        and a.reportDate > (select max(reportDate) from SiteBasedAlexa)
    """
    logger.info("end")
コード例 #11
0
def main():
	sendToMe(subject = "matchedVideo start", body = "matchedVideo start")

	while True:
		min_reportDate = getMinDatePara("matchedVideo", "reportDate")
		if not min_reportDate:
			min_reportDate = "2015-02-28"
		max_reportDate = time.strftime("%Y-%m-%d", \
			time.localtime(time.mktime(time.strptime(str(min_reportDate), "%Y-%m-%d")) + 1 * 24 * 60 * 60))

		min_updateDate = getMinDatePara("matchedVideo", "updateDate")
		if not min_updateDate:
			min_updateDate = "2015-02-28"
			
		if str(max_reportDate) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))):
			break

		getMatchedVideo(min_reportDate, max_reportDate, min_updateDate)

	sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
コード例 #12
0
def updateSiteBasedAlexa():
    logger.info("start")
    alexa_report_date_para = getMinDatePara("SiteBasedAlexa", "reportDate")
    update_SQL = """
      update SiteBased as a, SiteBasedAlexa as b
      set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry
      where a.trackingWebsite_id = b.trackingWebsite_id
        and b.reportDate = (select max(reportDate) from SiteBasedAlexa)
        and a.reportDate > (select max(reportDate) from SiteBasedAlexa)
    """
    logger.info("end")
コード例 #13
0
def main():
    sendToMe(subject="titleBased_infringViews start",
             body="titleBased_infringViews start")

    end_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at")
    start_date = time.strftime("%Y-%m-%d", \
      time.localtime(time.mktime(time.strptime(str(end_date), "%Y-%m-%d")) - 1 * 24 * 60 * 60))
    start_date = "2015-02-28"
    updateViews(start_date, end_date)

    sendToMe(subject="titleBased_infringViews end",
             body="titleBased_infringViews end")
コード例 #14
0
def main():
    sendToMe(subject="titleBased_infringAllViews start", body="titleBased_infringAllViews start")

    cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    judgeFileExist(cfg_file=cfg_file, job_name="titleBased_infringAllViews")

    end_date = getMinDatePara("matchedVideoViewCountCompletionAll", "report_at")
    #end_date = "2016-05-02"
    if not end_date:
        sendToMe(subject="matchedVideoViewCountCompletionAll has no data", body="matchedVideoViewCountCompletionAll has no data")
    start_date = time.strftime("%Y-%m-%d", \
                   time.localtime(time.mktime(time.strptime(str(end_date), "%Y-%m-%d")) - 10 * 24 * 60 * 60))
    #start_date  ="2015-02-28"
    updateViews(start_date, end_date)

    sendToMe(subject="titleBased_infringAllViews end", body="titleBased_infringAllViews end")
コード例 #15
0
def getDataFromVT():
	logger.info(": extract data from tracker2 start")
	date_para_TitleBasedRemoveNum_min = getMinDatePara(table_name = "TitleBasedRemoveNum", date_para = "takeoffDate")
	if date_para_TitleBasedRemoveNum_min == None:
		date_para_TitleBasedRemoveNum_min = "2015-02-28"
	date_para_TitleBasedRemoveNum_min = "2015-02-28"
	date_para_TitleBasedRemoveNum_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 0 * 24 * 60 * 60))

	date_para_TitleBasedRemoveNum_dict = {"date_para_TitleBasedRemoveNum_min":date_para_TitleBasedRemoveNum_min, \
		"date_para_TitleBasedRemoveNum_max":date_para_TitleBasedRemoveNum_max, "min_report_date": "2015-03-01"}
	vt_TitleBasedRemoveNum_SQL = """
		select
		  date_format(a.created_at, "%%Y-%%m-%%d") as reportDate,
		  date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate,
		  a.trackingWebsite_id,
		  a.trackingMeta_id,
		  count(*) removedNum,
		  sum(case when a.first_send_notice_date >0 and a.takeoff_time>0  
		  	then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0  end) as complianceTime,
		  CURRENT_TIMESTAMP as ETLDate
		from tracker2.matchedVideo as a, mddb.trackingWebsite as b
		where a.trackingWebsite_id = b.id
		  and a.company_id = 14
		  and b.website_type = "ugc"
		  and a.count_send_notice > 0
		  and hide_flag = 2
		  and a.first_send_notice_date < a.takeoff_time
		  and a.first_send_notice_date > 0
		  and a.created_at >= "%(min_report_date)s"
		  and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s"
		  and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s"
		group by 1, 2, 3, 4
		UNION ALL
		select 
		  reportDate,
		  takeoffDate,
		  trackingWebsite_id,
		  trackingMeta_id,
		  sum(removedNum) removedNum,
		  sum(complianceTime) complianceTime,
		  CURRENT_TIMESTAMP as ETLDate
		from  (select
		    date_format(a.created_at, "%%Y-%%m-%%d") as reportDate,
		    date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate,
		    a.trackingWebsite_id,
		    a.trackingMeta_id,
		    count(*) removedNum,
		    sum(case when a.first_send_notice_date >0 and a.takeoff_time>0  
		      then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0  end) as complianceTime
		  from tracker2.matchedVideo as a, mddb.trackingWebsite as b
		  where a.trackingWebsite_id = b.id
		    and a.company_id = 14
		    and b.website_type = "hybrid"
		    and a.count_send_notice > 0
		    and a.matchedFile_id = 0
		    and hide_flag = 2
		    and a.first_send_notice_date < a.takeoff_time
		    and a.first_send_notice_date > 0
		    and a.created_at >= "%(min_report_date)s"
		    and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s"
		    and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s"
		  group by 1, 2, 3, 4
		  union all
		  select
		    date_format(a.created_at, "%%Y-%%m-%%d") as reportDate,
		    date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate,
		    a.trackingWebsite_id,
		    a.trackingMeta_id,
		    count(*) removedNum,
		    sum(case when a.first_send_notice_date >0 and a.takeoff_time>0  
		      then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0  end) as complianceTime
		  from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d
		  where a.trackingWebsite_id = b.id
		    and d.matchedFile_id =  a.matchedFile_id
		    and a.company_id = 14
		    and b.website_type = "hybrid"
		    and a.matchedFile_id > 0
		    and a.count_send_notice > 0
		    and hide_flag = 2
		    and a.first_send_notice_date < a.takeoff_time
		    and a.first_send_notice_date > 0
		    and a.created_at >= "%(min_report_date)s"
		    and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s"
		    and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s"
		  group by 1, 2, 3, 4) as a
		group by 1,2 ,3 ,4
		UNION ALL
		select
		  date_format(a.created_at, "%%Y-%%m-%%d") as reportDate,
		  date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate,
		  a.trackingWebsite_id,
		  a.trackingMeta_id,
		  count(*) removedNum,
		  sum(case when a.first_send_notice_date >0 and a.takeoff_time>0  
		  	then TIMESTAMPDIFF(MINUTE, a.first_send_notice_date, a.takeoff_time) else 0  end) as complianceTime,
		  CURRENT_TIMESTAMP as ETLDate
		from tracker2.matchedVideo as a, mddb.trackingWebsite as b, tracker2.matchedFileItem d
		where a.trackingWebsite_id = b.id
		  and d.matchedFile_id =  a.matchedFile_id
		  and a.company_id = 14
		  and b.website_type = "cyberlocker"
		  and a.count_send_notice > 0
		  and hide_flag = 2
		  and a.first_send_notice_date < a.takeoff_time
		  and a.first_send_notice_date > 0
		  and a.created_at >= "%(min_report_date)s"
		  and date_format(a.takeoff_time, "%%Y-%%m-%%d") > "%(date_para_TitleBasedRemoveNum_min)s"
		  and date_format(a.takeoff_time, "%%Y-%%m-%%d") < "%(date_para_TitleBasedRemoveNum_max)s"
		group by 1, 2, 3, 4
	""" %date_para_TitleBasedRemoveNum_dict

	vtweb_tracker2_section = "vtweb_staging"
	try:
		vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section)
		vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db)
		vtweb_mysql.queryCMD("set time_zone = '-8:00'")
		result = vtweb_mysql.queryCMD(vt_TitleBasedRemoveNum_SQL)
	except Exception, e:
		logger.debug(": extract data from vt for TitleBasedRemoveNum, %s" %e)
		sendToMe(subject = "TitleBasedRemove ERROR", body = re.sub(r'\'|"|!', "", str(e)))
		sys.exit(0)
コード例 #16
0
def main():
    cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" %cfg_file) 
        sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start")
    target_server_section = "target_server_staging"
    target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
        alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate")
        alexa_date_min = '2016-04-12'
        if alexa_date_min == None:
            alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60))
        alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60))
	delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d"))
        if delt >= 10*24*60*60:
            alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max}
            site_SQL = """
                select distinct trackingWebsite_id, websiteDomain from SiteBased 
                where reportDate <= "%(alexa_date_max)s"
                  and reportDate > "%(alexa_date_min)s"
                  and alexaGlobalRank = 0
                  and alexaTopCountry = "unknown"
                """ %alexa_date_dict

            site_info = target_mysql.queryCMD(site_SQL)
            if site_info:
                base_url = "http://www.alexa.com/siteinfo/"
                for site in site_info:
                    domain = site[1]
                    url = base_url + domain
                    run_time = 0
                    alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                    while True:         
                        run_time += 1
                        if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                            alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)                    
                        else:
                            break
                        if run_time == 3:
                            break
                        time.sleep(random.randint(5, 8))

                    time.sleep(random.randint(5, 8))
                    alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))]

                    if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"):
                        insert_SiteBasedAlexa_SQL = """
                            insert into SiteBasedAlexa
                                (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) 
                            values (%s, %s, %s, %s, %s, %s)  
                            on duplicate  key update 
                                alexaGlobalRank = values(alexaGlobalRank), 
                                alexaTopCountry = values(alexaTopCountry), ETLDate  = values(ETLDate)
                        """
			try:
                        	target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple)
				print alexa_info_tuple
                        	target_mysql.commit()
			except MySQLdb.Error, e:
				logger.debug(e)
				sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e)))		
				continue
            else:
                logger.info("has no data %s" %alexa_date_max)        
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" %e)
        sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)
コード例 #17
0
logger = logging.getLogger("titleBased_remove")
logger.setLevel(logging.DEBUG)
log_file = '/Job/VIACOM/Dashboard/TitleBasedStaging/log/titleBased_remove.log'
filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a')
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)

cfg_file = "/Job/VIACOM/Dashboard/TitleBasedStaging/conf/viacom_dashboard.cfg"
if not os.path.exists(cfg_file):
	logging.debug(": config file not exists") 
	sys.exit(0)
#################################################################################################################################
logger.info(": extract data from tracker2 start")
# extract dat from vtweb
date_para_TitleBasedRemoveNum_min = getMinDatePara(table_name = "TitleBasedRemoveNum", date_para = "takeoffDate")
if date_para_TitleBasedRemoveNum_min == None:
	date_para_TitleBasedRemoveNum_min = "2015-02-28"
print date_para_TitleBasedRemoveNum_min
date_para_TitleBasedRemoveNum_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 0 * 24 * 60 * 60))

date_para_TitleBasedRemoveNum_dict = {"date_para_TitleBasedRemoveNum_min":date_para_TitleBasedRemoveNum_min, \
	"date_para_TitleBasedRemoveNum_max":date_para_TitleBasedRemoveNum_max, "min_report_date": "2015-03-01"}
vt_TitleBasedRemoveNum_SQL = """
	select
	  date_format(a.created_at, "%%Y-%%m-%%d") as reportDate,
	  date_format(a.takeoff_time, "%%Y-%%m-%%d") as takeoffDate,
	  a.trackingWebsite_id,
	  a.trackingMeta_id,
	  count(*) removedNum,
	  sum(case when a.first_send_notice_date >0 and a.takeoff_time>0  
コード例 #18
0
    return getGlobalRank(html), getTopOneCountry(html)


target_server_section = "target_server_staging"
target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
    target_server_section)
try:

    target_mysql = MySQLHelper(host=target_host,
                               user=target_user,
                               passwd=target_passwd,
                               db_name=target_db,
                               port=target_port,
                               charset='utf8')
    alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate")
    if alexa_date_min == None:
        alexa_date_min = time.strftime(
            "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60))
    alexa_date_max = time.strftime(
        "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60))

    alexa_date_dict = {
        "alexa_date_min": alexa_date_min,
        "alexa_date_max": alexa_date_max
    }
    site_SQL = """
		select distinct trackingWebsite_id, websiteDomain from SiteBased 
		where reportDate <= "%(alexa_date_max)s"
		  and reportDate > "%(alexa_date_min)s"
		  and alexaGlobalRank = 0
コード例 #19
0
def main():
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" % cfg_file)
        sendToMe(subject="SiteBased_alexa ERROR",
                 body="config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject="SiteBased_alexa start",
             body="extract data from siteBased start")
    target_server_section = "target_server_staging"
    target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
        target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host,
                                   user=target_user,
                                   passwd=target_passwd,
                                   db_name=target_db,
                                   port=target_port,
                                   charset='utf8')
        alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate")
        alexa_date_min = '2016-04-12'
        if alexa_date_min == None:
            alexa_date_min = time.strftime(
                "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60))
        alexa_date_max = time.strftime(
            "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60))
        delt = time.mktime(time.strptime(
            str(alexa_date_max), "%Y-%m-%d")) - time.mktime(
                time.strptime(str(alexa_date_min), "%Y-%m-%d"))
        if delt >= 10 * 24 * 60 * 60:
            alexa_date_dict = {
                "alexa_date_min": alexa_date_min,
                "alexa_date_max": alexa_date_max
            }
            site_SQL = """
                select distinct trackingWebsite_id, websiteDomain from SiteBased 
                where reportDate <= "%(alexa_date_max)s"
                  and reportDate > "%(alexa_date_min)s"
                  and alexaGlobalRank = 0
                  and alexaTopCountry = "unknown"
                """ % alexa_date_dict

            site_info = target_mysql.queryCMD(site_SQL)
            if site_info:
                base_url = "http://www.alexa.com/siteinfo/"
                for site in site_info:
                    domain = site[1]
                    url = base_url + domain
                    run_time = 0
                    alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                    while True:
                        run_time += 1
                        if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                            alexaGlobalRank, alexaTopCountry = getAlexaInfo(
                                url)
                        else:
                            break
                        if run_time == 3:
                            break
                        time.sleep(random.randint(5, 8))

                    time.sleep(random.randint(5, 8))
                    alexa_info_tuple = [(alexa_date_max, site[0], site[1],
                                         alexaGlobalRank, alexaTopCountry,
                                         time.strftime("%Y-%m-%d %H:%M:%S"))]

                    if not (alexaGlobalRank == 0
                            and alexaTopCountry == "unknown"):
                        insert_SiteBasedAlexa_SQL = """
                            insert into SiteBasedAlexa
                                (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) 
                            values (%s, %s, %s, %s, %s, %s)  
                            on duplicate  key update 
                                alexaGlobalRank = values(alexaGlobalRank), 
                                alexaTopCountry = values(alexaTopCountry), ETLDate  = values(ETLDate)
                        """
                        try:
                            target_mysql.insertUpdateCMD(
                                insert_SiteBasedAlexa_SQL, alexa_info_tuple)
                            print alexa_info_tuple
                            target_mysql.commit()
                        except MySQLdb.Error, e:
                            logger.debug(e)
                            sendToMe(subject="update SiteBasedAlexa Error",
                                     body=re.sub(r'\'|"|!', "", str(e)))
                            continue
            else:
                logger.info("has no data %s" % alexa_date_max)
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" % e)
        sendToMe(subject="SiteBasedAlexa ERROR",
                 body=re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)