예제 #1
0
def main():
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" %cfg_file) 
        sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start")
    target_server_section = "staging"
    target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
        if True:
	    f = open("id_dis_domain", "r")
            for line in f.readlines():
		line = line.strip("\n")
                base_url = "http://www.alexa.com/siteinfo/"
		id, display_name, domain = line.split(",")[1], line.split(",")[2], line.split(",")[3]
                url = base_url + domain
                run_time = 0
                alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                while True:         
                    run_time += 1
                    if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                        alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)                    
                    else:
                        break
                    if run_time == 3:
                        break
                    time.sleep(random.randint(5, 8))

                time.sleep(random.randint(5, 8))
                alexa_info_tuple = [("2016-05-31", id, display_name, alexaGlobalRank,alexaTopCountry,1)]

                if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"):
                    insert_SiteBasedAlexa_SQL = """
                        insert into Website_Alexa_Info
                            (CreateDate, WebsiteId, DisplayName, Rank, TopOneCountry, IsEndOfMonth) 
                        values (%s, %s, %s, %s, %s, %s)  
                        on duplicate  key update 
                            Rank = values(Rank), 
                            TopOneCountry = values(TopOneCountry)
                    """
		    try:
                        target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple)
			print alexa_info_tuple
                        target_mysql.commit()
		    except MySQLdb.Error, e:
		    	logger.debug(e)
			sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e)))		
			continue
            else:
                logger.info("has no data %s" %alexa_date_max)        
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" %e)
        sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)
def addAlexa(d):
    sendToMe(subject = "add alexa data start", body = "add alexa data start")
    logger.info(" addSiteBasedAlexa start")
    get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" %d
    insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);"
    try:
	target_server_section = "target_server_staging"
        target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
        target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
	alexa_result = target_mysql.queryCMD(get_alexa_sql)
	target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result)
	target_mysql.commit()
    except Exception, e:
        sendToMe(subject = "add alexa data ERROR", body = e)
        logger.DEBUG("add alexa data %s" %e)
예제 #3
0
def addAlexa(d):
    sendToMe(subject="add alexa data start", body="add alexa data start")
    logger.info(" addSiteBasedAlexa start")
    get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" % d
    insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);"
    try:
        target_server_section = "target_server_staging"
        target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
            target_server_section)
        target_mysql = MySQLHelper(host=target_host,
                                   user=target_user,
                                   passwd=target_passwd,
                                   db_name=target_db,
                                   port=target_port,
                                   charset='utf8')
        alexa_result = target_mysql.queryCMD(get_alexa_sql)
        target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result)
        target_mysql.commit()
    except Exception, e:
        sendToMe(subject="add alexa data ERROR", body=e)
        logger.DEBUG("add alexa data %s" % e)
예제 #4
0
finally:
    vtweb_mysql.closeCur()
    vtweb_mysql.closeConn()
    logger.info(": extract data from tracker2 start")

logger.info(":load data to TitleBasedMeta  start")
target_server_section = "target_server_staging"
try:
    target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
        target_server_section)
    target_mysql = MySQLHelper(host=target_host,
                               user=target_user,
                               passwd=target_passwd,
                               db_name=target_db,
                               port=target_port,
                               charset='utf8')
    insertUpdate_SQL = """
		INSERT INTO TitleBasedMeta(trackingMeta_id, metaType, title, ETLDate) VALUES(%s, %s, %s, %s) on duplicate  key update title = values(title),
		ETLDate = values(ETLDate), metaType = values(metaType)
	"""
    target_mysql.insertUpdateCMD(insertUpdate_SQL, result)
    target_mysql.commit()
except Exception, e:
    logger.debug(": load data to TitleBasedMeta, %s" % e)
    sys.exit(0)
finally:
    target_mysql.closeCur()
    target_mysql.closeConn()
    logger.info(":load data to TitleBasedMeta  end")
#################################################################################################################################
		  title,
		  sum(ReportedViews) as reportedViews,
		  current_timestamp as ETLDate
		from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b
		where  a.trackingWebsite_id = b.trackingWebsite_id
		  and a.WebsiteType = 'ugc'
		  and b.WebsiteType = 'ugc'
		  and a.Date_ID >= '%(date_para_reportedViews_min)s'
		  and a.Date_ID < '%(date_para_reportedViews_max)s'
		group by 1, 2, 3, 4, 5
	""" %date_para_reportedViews_dict

	reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL)

	insert_reportedViews_SQL = """
		INSERT INTO TitleBased1 
		(reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insert_reportedViews_SQL, reportedViews_result)
	target_mysql.commit()
except Exception, e:
	logger.debug(": load data to TitleBased1, %s" %e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
logger.info(" aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate to  TitleBased1 end")

	vtweb_mysql.closeCur()
	vtweb_mysql.closeConn()
	logger.info(": extract data from tracker2 start")

logger.info(":load data to TitleBasedCountry  start")
target_server_section = "target_server_staging"
try:
	target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
	target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
	insertUpdate_SQL = """
		INSERT INTO TitleBasedCountry
		(country_id, region, countryName, nationalFlag, ETLDate) 
		VALUES(%s, %s, %s, %s, %s) 
		on duplicate  key update 
			region = values(region), ETLDate = values(ETLDate), 
			countryName = values(countryName), nationalFlag = VALUES(nationalFlag)
	"""

	target_mysql.insertUpdateCMD(insertUpdate_SQL, [(t[0], t[1], t[2].title(), t[3], t[4]) for t in result])
	target_mysql.commit()
except Exception, e:
	logger.debug(": load data to TitleBasedCountry, %s" %e)
	sendToMe(subject = "titleBased_country ERROR", body = e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
	logger.info(":load data to TitleBasedCountry  end")
sendToMe(subject = "TitleBasedCountry end", body = "load data to TitleBasedCountry  end")	
#################################################################################################################################
예제 #7
0
		  and a.WebsiteType = 'ugc'
		  and b.WebsiteType = 'ugc'
		  and a.Date_ID >= '%(date_para_reportedViews_min)s'
		  and a.Date_ID < '%(date_para_reportedViews_max)s'
		group by 1, 2, 3, 4, 5
	""" % date_para_reportedViews_dict

    reportedViews_result = target_mysql.queryCMD(aggregate_reportedViews_SQL)

    insert_reportedViews_SQL = """
		INSERT INTO TitleBased1 
		(reportDate, trackingWebsite_id, websiteName, websiteType, title, reportedViews, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE reportedViews = VALUES(reportedViews), ETLDate = VALUES(ETLDate)
	"""
    target_mysql.insertUpdateCMD(insert_reportedViews_SQL,
                                 reportedViews_result)
    target_mysql.commit()
except Exception, e:
    logger.debug(": load data to TitleBased1, %s" % e)
    sendToMe(subject="titleBased_views ERROR", body=e)
    sys.exit(0)
finally:
    target_mysql.closeCur()
    target_mysql.closeConn()
logger.info(
    " aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate to  TitleBased1 end"
)
sendToMe(
    subject="titleBased_views End",
    body=
    "aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate(views) to  TitleBased1 end"
예제 #8
0
			matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero,
			infringingNumCMS, clipDurationSum, clipDurationInfringingSum,
			reportedViews, infringingViews, reportedViewsCMS, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE 
  			websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), 
  			country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), 
  			matchedNum = VALUES(matchedNum), matchedNumDurationNoZero = values(matchedNumDurationNoZero), 
  			infringingNum = VALUES(infringingNum), infringingNumDurationNoZero = values(infringingNumDurationNoZero),
  			infringingNumCMS = VALUES(infringingNumCMS), 
  			clipDurationSum = VALUES(clipDurationSum),clipDurationInfringingSum = VALUES(clipDurationInfringingSum),
  			reportedViews = VALUES(reportedViews), infringingViews = values(infringingViews), 
			reportedViewsCMS = VALUES(reportedViewsCMS),
  			ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insert_SiteBased_SQL, aggregate_TitleBased1_result)
	target_mysql.commit()
except Exception, e:
	logger.debug("load data to SiteBased, %s" %e)
	sendToMe(subject = "SiteBased Error", body = re.sub(r'\'|"|!', "", str(e)))
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
logger.info(" aggregate data from TitleBased1 to  SiteBased end")
#################################################################################################################################
#--------------------------------------------------------------------------------------------------------------------------------
logger.info(" aggregate data from TitleBasedRemoveNum to  SiteBasedRemoveNum start")
try:
	target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, 
		db_name = target_db, port = target_port, charset = 'utf8')
예제 #9
0
            time.sleep(random.randint(3, 5))
            alexa_info_tuple = [(alexa_date_max, site[0], site[1],
                                 alexaGlobalRank, alexaTopCountry,
                                 time.strftime("%Y-%m-%d %H:%M:%S"))]

            if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"):
                insert_SiteBasedAlexa_SQL = """
					insert into SiteBasedAlexa
						(reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) 
					values (%s, %s, %s, %s, %s, %s)  
					on duplicate  key update 
						alexaGlobalRank = values(alexaGlobalRank), 
						alexaTopCountry = values(alexaTopCountry), ETLDate  = values(ETLDate)
				"""
                target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL,
                                             alexa_info_tuple)
                target_mysql.commit()
    else:
        logger.info("has no data %s" % alexa_date_max)

except Exception, e:
    logger.debug(": load data to SiteBasedAlexa %s" % e)
    sys.exit(0)
finally:
    target_mysql.closeCur()
    target_mysql.closeConn()
    logger.info(":load data to SiteBasedAlexa  end")
#################################################################################################################################
#sed  "/\/home\/vobile\/cwj\/ViacomProject\/dashboard\/job/\/Job\/VIACOM\/Dashboard\/TitleBased/g"
예제 #10
0
def main():
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" % cfg_file)
        sendToMe(subject="SiteBased_alexa ERROR",
                 body="config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject="SiteBased_alexa start",
             body="extract data from siteBased start")
    target_server_section = "staging"
    target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
        target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host,
                                   user=target_user,
                                   passwd=target_passwd,
                                   db_name=target_db,
                                   port=target_port,
                                   charset='utf8')
        if True:
            f = open("id_dis_domain", "r")
            for line in f.readlines():
                line = line.strip("\n")
                base_url = "http://www.alexa.com/siteinfo/"
                id, display_name, domain = line.split(",")[1], line.split(
                    ",")[2], line.split(",")[3]
                url = base_url + domain
                run_time = 0
                alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                while True:
                    run_time += 1
                    if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                        alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                    else:
                        break
                    if run_time == 3:
                        break
                    time.sleep(random.randint(5, 8))

                time.sleep(random.randint(5, 8))
                alexa_info_tuple = [("2016-05-31", id, display_name,
                                     alexaGlobalRank, alexaTopCountry, 1)]

                if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"):
                    insert_SiteBasedAlexa_SQL = """
                        insert into Website_Alexa_Info
                            (CreateDate, WebsiteId, DisplayName, Rank, TopOneCountry, IsEndOfMonth) 
                        values (%s, %s, %s, %s, %s, %s)  
                        on duplicate  key update 
                            Rank = values(Rank), 
                            TopOneCountry = values(TopOneCountry)
                    """
                    try:
                        target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL,
                                                     alexa_info_tuple)
                        print alexa_info_tuple
                        target_mysql.commit()
                    except MySQLdb.Error, e:
                        logger.debug(e)
                        sendToMe(subject="update SiteBasedAlexa Error",
                                 body=re.sub(r'\'|"|!', "", str(e)))
                        continue
            else:
                logger.info("has no data %s" % alexa_date_max)
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" % e)
        sendToMe(subject="SiteBasedAlexa ERROR",
                 body=re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)
예제 #11
0
try:
    target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
        target_server_section)
    target_mysql = MySQLHelper(host=target_host,
                               user=target_user,
                               passwd=target_passwd,
                               db_name=target_db,
                               port=target_port,
                               charset='utf8')
    insertUpdate_SQL = """
		INSERT INTO TitleBasedCountry
		(country_id, region, countryName, nationalFlag, ETLDate) 
		VALUES(%s, %s, %s, %s, %s) 
		on duplicate  key update 
			region = values(region), ETLDate = values(ETLDate), 
			countryName = values(countryName), nationalFlag = VALUES(nationalFlag)
	"""

    target_mysql.insertUpdateCMD(insertUpdate_SQL,
                                 [(t[0], t[1], t[2].title(), t[3], t[4])
                                  for t in result])
    target_mysql.commit()
except Exception, e:
    logger.debug(": load data to TitleBasedCountry, %s" % e)
    sys.exit(0)
finally:
    target_mysql.closeCur()
    target_mysql.closeConn()
    logger.info(":load data to TitleBasedCountry  end")
#################################################################################################################################
예제 #12
0
	insert_SiteBased_SQL = """
		INSERT INTO SiteBased
			(reportDate, trackingWebsite_id, websiteName, websiteType, 
			websiteDomain, country_id, hostCountry, title, matchedNum, infringingNum, 
			infringingNumCMS, clipDurationSum, reportedViews, reportedViewsCMS, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE 
  			websiteName = VALUES(websiteName), websiteDomain = VALUES(websiteDomain), 
  			country_id = VALUES(country_id), hostCountry = VALUES(hostCountry), 
  			matchedNum = VALUES(matchedNum), infringingNum = VALUES(infringingNum), 
  			infringingNumCMS = VALUES(infringingNumCMS), clipDurationSum = VALUES(clipDurationSum),
  			reportedViews = VALUES(reportedViews), reportedViewsCMS = VALUES(reportedViewsCMS),
  			ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insert_SiteBased_SQL, aggregate_TitleBased1_result)
	target_mysql.commit()
except Exception, e:
	logger.debug("load data to SiteBased, %s" %e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
logger.info(" aggregate data from TitleBased1 to  SiteBased end")
#################################################################################################################################
#--------------------------------------------------------------------------------------------------------------------------------
logger.info(" aggregate data from TitleBasedRemoveNum to  SiteBasedRemoveNum start")
try:
	target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, 
		db_name = target_db, port = target_port, charset = 'utf8')
	aggregate_TitleBasedRemoveNum_SQL = """
예제 #13
0
		  and a.Date_ID >= '%(date_para_CMS_min)s'
		  and a.Date_ID < '%(date_para_CMS_max)s'
		group by 1, 2, 3, 4, 5
	""" % date_para_CMS_dict

    CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL)

    insert_CMS_SQL = """
		INSERT INTO TitleBased1 
			(reportDate, trackingWebsite_id, websiteName, websiteType, title, 
				infringingNumCMS, reportedViewsCMS, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS),
  			reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate)
	"""
    target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result)
    target_mysql.commit()
except Exception, e:
    logger.debug(": load data to TitleBased1, %s" % e)
    sendToMe(subject="TitleBased_views ERROR", body=e)
    sys.exit(0)
finally:
    target_mysql.closeCur()
    target_mysql.closeConn()
logger.info(
    " aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to  TitleBased1 end"
)
sendToMe(
    subject="titleBased_CMS End",
    body=
    "aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to  TitleBased1 end"
		    sum(clipDurationSum) as clipDurationSum,
		    current_timestamp as ETLDate
		  from TitleBased as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c
		  where a.trackingWebsite_id = c.trackingWebsite_id
		    and a.trackingMeta_id = b.trackingMeta_id
		  group by 1, 2, 3, 4, 5) as a 
		  left join MetaTitleMapTitle as b 
		  on a. title = b.metaTitle 
		  group by 1, 2, 3, 4, 5, 6
	""" #%date_para_TitleBased1_dict

	TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL)

	insert_TitleBased1_SQL = """
		INSERT INTO TitleBased1 
			(reportDate, trackingWebsite_id, websiteName, websiteType, title, tier,
				matchedNum, infringingNum, clipDurationSum, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum),
  			infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result)
	target_mysql.commit()
except Exception, e:
	logger.debug(": load data to TitleBased1, %s" %e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
logger.info(" aggregate data from TitleBased  to  TitleBased1 end")
예제 #15
0
		  group by 1, 2, 3, 4, 5, 6) as a
		left join MetaTitleMapTitle as b
		on a.title = b.metaTitle
		group by 1, 2, 3, 4, 5, 6
	"""
	print "===================="
	aggregate_result = target_mysql.queryCMD(aggregate_SQL)
	
	print aggregate_result[1]
	insertUpdate_SQL = """
		INSERT INTO TitleBasedRemoveNum1 
			(reportDate, takeoffDate, trackingWebsite_id, websiteName, 
				websiteType, title, removedNum, complianceTime,  ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE 
  			removedNum = VALUES(removedNum), complianceTime = VALUES(complianceTime), ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insertUpdate_SQL, aggregate_result)
	target_mysql.commit()
except Exception, e:
	logger.debug(" load data to TitleBasedRemoveNum1, %s" %e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
	logger.info(" load data to TitleBasedRemoveNum1  end")
#################################################################################################################################
#sed  "/\/home\/vobile\/cwj\/ViacomProject\/dashboard\/job/\/Job\/VIACOM\/Dashboard\/TitleBased/g"


예제 #16
0
def main():
    cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" % cfg_file)
        sendToMe(subject="SiteBased_alexa ERROR",
                 body="config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject="SiteBased_alexa start",
             body="extract data from siteBased start")
    target_server_section = "target_server_staging"
    target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(
        target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host,
                                   user=target_user,
                                   passwd=target_passwd,
                                   db_name=target_db,
                                   port=target_port,
                                   charset='utf8')
        alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate")
        alexa_date_min = '2016-04-12'
        if alexa_date_min == None:
            alexa_date_min = time.strftime(
                "%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60))
        alexa_date_max = time.strftime(
            "%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60))
        delt = time.mktime(time.strptime(
            str(alexa_date_max), "%Y-%m-%d")) - time.mktime(
                time.strptime(str(alexa_date_min), "%Y-%m-%d"))
        if delt >= 10 * 24 * 60 * 60:
            alexa_date_dict = {
                "alexa_date_min": alexa_date_min,
                "alexa_date_max": alexa_date_max
            }
            site_SQL = """
                select distinct trackingWebsite_id, websiteDomain from SiteBased 
                where reportDate <= "%(alexa_date_max)s"
                  and reportDate > "%(alexa_date_min)s"
                  and alexaGlobalRank = 0
                  and alexaTopCountry = "unknown"
                """ % alexa_date_dict

            site_info = target_mysql.queryCMD(site_SQL)
            if site_info:
                base_url = "http://www.alexa.com/siteinfo/"
                for site in site_info:
                    domain = site[1]
                    url = base_url + domain
                    run_time = 0
                    alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                    while True:
                        run_time += 1
                        if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                            alexaGlobalRank, alexaTopCountry = getAlexaInfo(
                                url)
                        else:
                            break
                        if run_time == 3:
                            break
                        time.sleep(random.randint(5, 8))

                    time.sleep(random.randint(5, 8))
                    alexa_info_tuple = [(alexa_date_max, site[0], site[1],
                                         alexaGlobalRank, alexaTopCountry,
                                         time.strftime("%Y-%m-%d %H:%M:%S"))]

                    if not (alexaGlobalRank == 0
                            and alexaTopCountry == "unknown"):
                        insert_SiteBasedAlexa_SQL = """
                            insert into SiteBasedAlexa
                                (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) 
                            values (%s, %s, %s, %s, %s, %s)  
                            on duplicate  key update 
                                alexaGlobalRank = values(alexaGlobalRank), 
                                alexaTopCountry = values(alexaTopCountry), ETLDate  = values(ETLDate)
                        """
                        try:
                            target_mysql.insertUpdateCMD(
                                insert_SiteBasedAlexa_SQL, alexa_info_tuple)
                            print alexa_info_tuple
                            target_mysql.commit()
                        except MySQLdb.Error, e:
                            logger.debug(e)
                            sendToMe(subject="update SiteBasedAlexa Error",
                                     body=re.sub(r'\'|"|!', "", str(e)))
                            continue
            else:
                logger.info("has no data %s" % alexa_date_max)
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" % e)
        sendToMe(subject="SiteBasedAlexa ERROR",
                 body=re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)
예제 #17
0
def main():
    cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg"
    if not os.path.exists(cfg_file):
        logging.debug(": config file not exists; file_name %s" %cfg_file) 
        sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists")
        sys.exit(0)

    logger.info(": extract data from siteBased start")
    socket.setdefaulttimeout(10.0)

    sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start")
    target_server_section = "target_server_staging"
    target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section)
    try:
        target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8')
        alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate")
        alexa_date_min = '2016-04-12'
        if alexa_date_min == None:
            alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60))
        alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60))
	delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d"))
        if delt >= 10*24*60*60:
            alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max}
            site_SQL = """
                select distinct trackingWebsite_id, websiteDomain from SiteBased 
                where reportDate <= "%(alexa_date_max)s"
                  and reportDate > "%(alexa_date_min)s"
                  and alexaGlobalRank = 0
                  and alexaTopCountry = "unknown"
                """ %alexa_date_dict

            site_info = target_mysql.queryCMD(site_SQL)
            if site_info:
                base_url = "http://www.alexa.com/siteinfo/"
                for site in site_info:
                    domain = site[1]
                    url = base_url + domain
                    run_time = 0
                    alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)
                    while True:         
                        run_time += 1
                        if alexaGlobalRank == 0 or alexaTopCountry == "unknown":
                            alexaGlobalRank, alexaTopCountry = getAlexaInfo(url)                    
                        else:
                            break
                        if run_time == 3:
                            break
                        time.sleep(random.randint(5, 8))

                    time.sleep(random.randint(5, 8))
                    alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))]

                    if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"):
                        insert_SiteBasedAlexa_SQL = """
                            insert into SiteBasedAlexa
                                (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) 
                            values (%s, %s, %s, %s, %s, %s)  
                            on duplicate  key update 
                                alexaGlobalRank = values(alexaGlobalRank), 
                                alexaTopCountry = values(alexaTopCountry), ETLDate  = values(ETLDate)
                        """
			try:
                        	target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple)
				print alexa_info_tuple
                        	target_mysql.commit()
			except MySQLdb.Error, e:
				logger.debug(e)
				sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e)))		
				continue
            else:
                logger.info("has no data %s" %alexa_date_max)        
    except Exception, e:
        logger.debug(": load data to SiteBasedAlexa %s" %e)
        sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e)))
        sys.exit(0)
예제 #18
0
		from DM_VIACOM.SelfService_Aggregate_ByNoticedDate as a, TitleBasedTrackingWebsite as b
		where  a.trackingWebsite_id = b.trackingWebsite_id
		  and a.WebsiteType = 'ugc'
		  and b.WebsiteType = 'ugc'
		  and a.trackingWebsite_id = 1
		  and a.Date_ID >= '%(date_para_CMS_min)s'
		  and a.Date_ID < '%(date_para_CMS_max)s'
		group by 1, 2, 3, 4, 5
	""" %date_para_CMS_dict

	CMS_result = target_mysql.queryCMD(aggregate_CMS_SQL)

	insert_CMS_SQL = """
		INSERT INTO TitleBased1 
			(reportDate, trackingWebsite_id, websiteName, websiteType, title, 
				infringingNumCMS, reportedViewsCMS, ETLDate) 
  		VALUES (%s, %s, %s, %s, %s, %s, %s, %s) 
  		ON DUPLICATE KEY UPDATE infringingNumCMS = VALUES(infringingNumCMS),
  			reportedViewsCMS = VALUES(reportedViewsCMS), ETLDate = VALUES(ETLDate)
	"""
	target_mysql.insertUpdateCMD(insert_CMS_SQL, CMS_result)
	target_mysql.commit()
except Exception, e:
	logger.debug(": load data to TitleBased1, %s" %e)
	sys.exit(0)
finally:
	target_mysql.closeCur()
	target_mysql.closeConn()
logger.info(" aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) to  TitleBased1 end")