def Sample(dateScope, outputName): tempDaysString = getDaysString(dateScope) outputPath = Conf.originalSamplePath + "/" + outputName sql = ' select '\ + ' distinct user_id,item_id '\ + ' from useritem '\ + ' where date_format(usertime,"%Y%m%d") in '\ + ' ( '\ + tempDaysString\ + ')'\ + ' group by user_id,item_id '\ + ' order by user_id '\ + ";" try: # MySQLConnect = pymysql.connect(host=Conf.host,port=Conf.port,user=Conf.user,passwd=Conf.password,db=Conf.databaseName) # MySQLCursor = MySQLConnect.cursor() # MySQLCursor.execute(sql) # result=MySQLCursor.fetchall() result = MySQL.getData(sql) MySQL.OutputTo(result, outputPath) # MySQLCursor.close() # MySQLConnect.close() print("数据提取完毕") except: print("数据提取有误")
def CateBuyCartRate(dateScope, outputPath): featureSid = "202" #outputPath=Conf.featureExtractPath+"/"+outputName dateString = getDaysString(dateScope) SQL = r""" select item_category,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=3,1,0))+1))) as BuyCartRate from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by item_category order by BuyCartRate desc; """ % (dateString) Result = MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath, featureSid)
def itemVisitPerDay(dateScope,outputPath): featureSid="104" # outputPath=Conf.featureExtractPath+"/"+outputName dateString=getDaysString(dateScope) daysNum=str(getDaysNum(dateScope)) SQL=r""" select item_id,ceil(100*(count(distinct user_id)/%s)) as VisitPerDay from useritem where behavior_type in (1,2,3,4) and date_format(usertime,'%%Y%%m%%d') in (%s) group by item_id order by VisitPerDay desc; """%(daysNum,dateString) Result=MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath,featureSid)
def DemandFeature(dateScope, outputPath): featureSid = ["400", "401", "402", "403"] # outputPath=Conf.featureExtractPath+"\\"+outputName dateString = getDaysString(dateScope) SQL = r""" select user_id, item_id,item_category, sum(if(behavior_type=1,1,0)) as Click,sum(if(behavior_type=2,1,0)) as Fav, sum(if(behavior_type=3,1,0)) as Cart,sum(if(behavior_type=4,1,0)) as Buy from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by user_id, item_id,item_category; """ % (dateString) Result = MySQL.getData(SQL) MySQL.OutputTo2(Result, outputPath, featureSid)
def gettempPredictset(dateScope, tempPredictsetFile): #tempPredictsetFile=Conf.temp_predictsetPath+"\\"+dateScope+"_tempPredictset.csv" dateString = getDaysString(dateScope) tempPredictsetFile = tempPredictsetFile.replace("\\", "/") if os.path.exists(tempPredictsetFile): os.remove(tempPredictsetFile) SQL = r""" select user_id,item_id,item_category from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by user_id, item_id,item_category into outfile "%s" fields terminated by '|' optionally enclosed by "" lines terminated by "\n"; """ % (dateString, tempPredictsetFile.replace("\\", "/")) conn = MySQL.Connect() cur = conn.cursor() try: cur.execute(SQL) conn.commit() cur.close() conn.close() except: cur.close() conn.close() print("测试集头信息提取完毕")
def Sample(dateScope, outputName): tempDaysString=getDaysString(dateScope) outputPath=Conf.originalSamplePath+"/" + outputName sql = ' select '\ + ' distinct user_id,item_id '\ + ' from useritem '\ + ' where date_format(usertime,"%Y%m%d") in '\ + ' ( '\ + tempDaysString\ + ')'\ + ' group by user_id,item_id '\ + ' order by user_id '\ + ";" try: # MySQLConnect = pymysql.connect(host=Conf.host,port=Conf.port,user=Conf.user,passwd=Conf.password,db=Conf.databaseName) # MySQLCursor = MySQLConnect.cursor() # MySQLCursor.execute(sql) # result=MySQLCursor.fetchall() result=MySQL.getData(sql) MySQL.OutputTo(result,outputPath) # MySQLCursor.close() # MySQLConnect.close() print("数据提取完毕") except : print("数据提取有误")
def itemBuyFavRate(dateScope,outputPath): featureSid="101" # outputPath=Conf.featureExtractPath+"/"+outputName dateString=getDaysString(dateScope) SQL=r""" select item_id,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=2,1,0))+1))) as BuyFavRate from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by item_id order by BuyFavRate desc; """%(dateString) Result=MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath,featureSid) #以下为测试代码 # itemBuyFavRate("20141122-20141127",r"\1122_1127\itemBuyFavRate.csv")
def itemBuyFavRate(dateScope, outputName): featureSid = "101" outputPath = Conf.featureExtractPath + "/" + outputName dateString = getDaysString(dateScope) SQL=r"(select b.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\ +"from "\ +"(( "\ +"select "\ +"item_id,count(behavior_type) as Fav "\ +"from useritem "\ +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")a right join "\ +"( "\ +"select "\ +"item_id,count(behavior_type) as Buy "\ +"from useritem "\ +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")b "\ +"on a.item_id=b.item_id "\ +")) "\ +"union "\ +"(select a.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\ +"from "\ +"(( "\ +"select "\ +"item_id,count(behavior_type) as Fav "\ +"from useritem "\ +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")a left join "\ +"( "\ +"select "\ +"item_id,count(behavior_type) as Buy "\ +"from useritem "\ +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")b "\ +"on a.item_id=b.item_id "\ +")) "\ +"order by BuyFavRate desc;" Result = MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath, featureSid)
def getTrainset(dateScope,tempTrainsetFile): # outputPath=Conf.trainsetTempPath+"\\"+outputName dateString=getDaysString(dateScope) SQL=r"select "\ +"user_id, item_id,item_category,if(behavior_type=4,1,0) "\ +"from useritem "\ +"where date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by user_id, item_id,item_category; " Result=MySQL.getData(SQL) MySQL.OutputTo3(Result,tempTrainsetFile)
def CateBuyClickRate(dateScope,outputPath): featureSid="200" # outputPath=Conf.featureExtractPath+"/"+outputName dateString=getDaysString(dateScope) SQL=r""" select item_category,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=1,1,0))+1))) as BuyClickRate from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by item_category order by BuyClickRate desc; """%(dateString) Result=MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath,featureSid)
def userBuyFavRate(dateScope,outputPath): featureSid="2" # outputPath=Conf.featureExtractPath+"/"+outputName dateString=getDaysString(dateScope) SQL=r""" select user_id,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=2,1,0))+1))) as BuyFavRate from useritem where date_format(usertime,'%%Y%%m%%d') in (%s) group by user_id order by BuyFavRate desc; """%(dateString) Result=MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath,featureSid) #以下为测试代码 # userBuyFavRate("20141122-20141127",r"\1122_1127\userBuyFavRate.csv")
def itemBuyFavRate(dateScope,outputName): featureSid="101" outputPath=Conf.featureExtractPath+"/"+outputName dateString=getDaysString(dateScope) SQL=r"(select b.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\ +"from "\ +"(( "\ +"select "\ +"item_id,count(behavior_type) as Fav "\ +"from useritem "\ +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")a right join "\ +"( "\ +"select "\ +"item_id,count(behavior_type) as Buy "\ +"from useritem "\ +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")b "\ +"on a.item_id=b.item_id "\ +")) "\ +"union "\ +"(select a.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\ +"from "\ +"(( "\ +"select "\ +"item_id,count(behavior_type) as Fav "\ +"from useritem "\ +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")a left join "\ +"( "\ +"select "\ +"item_id,count(behavior_type) as Buy "\ +"from useritem "\ +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\ +"group by item_id "\ +")b "\ +"on a.item_id=b.item_id "\ +")) "\ +"order by BuyFavRate desc;" Result=MySQL.getData(SQL) MySQL.OutputTo(Result, outputPath,featureSid)
def ItemWantedFilter(): SQL=r'select distinct item_id from item' result=MySQL.getData(SQL) outputPath=Conf.filterPath+"/"+"ItemWanted.csv" MySQL.OutputTo(result, outputPath)