コード例 #1
0
ファイル: UserItemSample.py プロジェクト: cash2one/TianChi
def Sample(dateScope, outputName):
    tempDaysString = getDaysString(dateScope)
    outputPath = Conf.originalSamplePath + "/" + outputName
    sql = ' select '\
        + ' distinct user_id,item_id '\
        + ' from useritem '\
        + ' where date_format(usertime,"%Y%m%d") in '\
        + ' ( '\
        + tempDaysString\
        + ')'\
        + ' group by user_id,item_id '\
        + ' order by user_id '\
        + ";"
    try:
        #         MySQLConnect = pymysql.connect(host=Conf.host,port=Conf.port,user=Conf.user,passwd=Conf.password,db=Conf.databaseName)
        #         MySQLCursor = MySQLConnect.cursor()
        #         MySQLCursor.execute(sql)
        #         result=MySQLCursor.fetchall()
        result = MySQL.getData(sql)
        MySQL.OutputTo(result, outputPath)
        #         MySQLCursor.close()
        #         MySQLConnect.close()
        print("数据提取完毕")
    except:
        print("数据提取有误")
コード例 #2
0
ファイル: CateBuyCartRate.py プロジェクト: cash2one/TianChi
def CateBuyCartRate(dateScope, outputPath):
    featureSid = "202"
    #outputPath=Conf.featureExtractPath+"/"+outputName
    dateString = getDaysString(dateScope)
    SQL = r"""
        select 
        item_category,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=3,1,0))+1))) as BuyCartRate 
        from useritem 
        where date_format(usertime,'%%Y%%m%%d') in (%s) 
        group by item_category 
        order by BuyCartRate desc;
        """ % (dateString)
    Result = MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath, featureSid)
コード例 #3
0
ファイル: itemVisitPerDay.py プロジェクト: cash2one/TianChi
def itemVisitPerDay(dateScope,outputPath):
    featureSid="104"
#     outputPath=Conf.featureExtractPath+"/"+outputName
    dateString=getDaysString(dateScope)
    daysNum=str(getDaysNum(dateScope))
    SQL=r"""
        select 
        item_id,ceil(100*(count(distinct user_id)/%s)) as VisitPerDay
        from useritem
        where behavior_type in (1,2,3,4) and date_format(usertime,'%%Y%%m%%d') in (%s)
        group by item_id
        order by VisitPerDay desc;
        """%(daysNum,dateString)
    Result=MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath,featureSid)
コード例 #4
0
ファイル: DemandFeature.py プロジェクト: cash2one/TianChi
def DemandFeature(dateScope, outputPath):
    featureSid = ["400", "401", "402", "403"]
    #     outputPath=Conf.featureExtractPath+"\\"+outputName
    dateString = getDaysString(dateScope)
    SQL = r"""
        select
        user_id, item_id,item_category,
        sum(if(behavior_type=1,1,0)) as Click,sum(if(behavior_type=2,1,0)) as Fav,
        sum(if(behavior_type=3,1,0)) as Cart,sum(if(behavior_type=4,1,0)) as Buy
        from useritem
        where date_format(usertime,'%%Y%%m%%d') in (%s)
        group by user_id, item_id,item_category;
        """ % (dateString)
    Result = MySQL.getData(SQL)
    MySQL.OutputTo2(Result, outputPath, featureSid)
コード例 #5
0
def gettempPredictset(dateScope, tempPredictsetFile):
    #tempPredictsetFile=Conf.temp_predictsetPath+"\\"+dateScope+"_tempPredictset.csv"
    dateString = getDaysString(dateScope)
    tempPredictsetFile = tempPredictsetFile.replace("\\", "/")
    if os.path.exists(tempPredictsetFile):
        os.remove(tempPredictsetFile)
    SQL = r"""
        select
        user_id,item_id,item_category
        from useritem
        where date_format(usertime,'%%Y%%m%%d') in (%s) 
        group by user_id, item_id,item_category
        into outfile 
        "%s"
        fields terminated by '|'
        optionally enclosed by ""
        lines terminated by "\n"; 
        """ % (dateString, tempPredictsetFile.replace("\\", "/"))
    conn = MySQL.Connect()
    cur = conn.cursor()
    try:
        cur.execute(SQL)
        conn.commit()
        cur.close()
        conn.close()
    except:
        cur.close()
        conn.close()
        print("测试集头信息提取完毕")
コード例 #6
0
ファイル: UserItemSample.py プロジェクト: UpstairsSB/TianChi
def Sample(dateScope, outputName):
    tempDaysString=getDaysString(dateScope)
    outputPath=Conf.originalSamplePath+"/" + outputName
    sql = ' select '\
        + ' distinct user_id,item_id '\
        + ' from useritem '\
        + ' where date_format(usertime,"%Y%m%d") in '\
        + ' ( '\
        + tempDaysString\
        + ')'\
        + ' group by user_id,item_id '\
        + ' order by user_id '\
        + ";"
    try:
#         MySQLConnect = pymysql.connect(host=Conf.host,port=Conf.port,user=Conf.user,passwd=Conf.password,db=Conf.databaseName)
#         MySQLCursor = MySQLConnect.cursor()
#         MySQLCursor.execute(sql)
#         result=MySQLCursor.fetchall()
        result=MySQL.getData(sql)
        MySQL.OutputTo(result,outputPath)
#         MySQLCursor.close()
#         MySQLConnect.close()
        print("数据提取完毕")
    except :
        print("数据提取有误")
コード例 #7
0
def itemBuyFavRate(dateScope,outputPath):
    featureSid="101"
#     outputPath=Conf.featureExtractPath+"/"+outputName
    dateString=getDaysString(dateScope)
    SQL=r"""
        select 
        item_id,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=2,1,0))+1))) as BuyFavRate 
        from useritem 
        where date_format(usertime,'%%Y%%m%%d') in (%s) 
        group by item_id 
        order by BuyFavRate desc;
        """%(dateString)
    Result=MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath,featureSid)
#以下为测试代码
# itemBuyFavRate("20141122-20141127",r"\1122_1127\itemBuyFavRate.csv")
コード例 #8
0
def itemBuyFavRate(dateScope, outputName):
    featureSid = "101"
    outputPath = Conf.featureExtractPath + "/" + outputName
    dateString = getDaysString(dateScope)
    SQL=r"(select b.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\
        +"from "\
        +"(( "\
        +"select "\
        +"item_id,count(behavior_type) as Fav "\
        +"from useritem "\
        +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")a right join "\
        +"( "\
        +"select "\
        +"item_id,count(behavior_type) as Buy "\
        +"from useritem "\
        +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")b "\
        +"on a.item_id=b.item_id "\
        +")) "\
        +"union "\
        +"(select a.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\
        +"from "\
        +"(( "\
        +"select "\
        +"item_id,count(behavior_type) as Fav "\
        +"from useritem "\
        +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")a left join "\
        +"( "\
        +"select "\
        +"item_id,count(behavior_type) as Buy "\
        +"from useritem "\
        +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")b "\
        +"on a.item_id=b.item_id "\
        +")) "\
        +"order by BuyFavRate desc;"
    Result = MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath, featureSid)
コード例 #9
0
ファイル: featureCompiler.py プロジェクト: UpstairsSB/TianChi
def getTrainset(dateScope,tempTrainsetFile):
#     outputPath=Conf.trainsetTempPath+"\\"+outputName
    dateString=getDaysString(dateScope)
    SQL=r"select "\
        +"user_id, item_id,item_category,if(behavior_type=4,1,0) "\
        +"from useritem "\
        +"where date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by user_id, item_id,item_category; "
    Result=MySQL.getData(SQL)
    MySQL.OutputTo3(Result,tempTrainsetFile)
コード例 #10
0
def CateBuyClickRate(dateScope,outputPath):
    featureSid="200"
#     outputPath=Conf.featureExtractPath+"/"+outputName
    dateString=getDaysString(dateScope)
    SQL=r"""
        select 
        item_category,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=1,1,0))+1))) as BuyClickRate 
        from useritem 
        where date_format(usertime,'%%Y%%m%%d') in (%s) 
        group by item_category 
        order by BuyClickRate desc;
        """%(dateString)
    Result=MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath,featureSid)
コード例 #11
0
ファイル: userBuyFavRate.py プロジェクト: UpstairsSB/TianChi
def userBuyFavRate(dateScope,outputPath):
    featureSid="2"
#     outputPath=Conf.featureExtractPath+"/"+outputName
    dateString=getDaysString(dateScope)
    SQL=r"""
        select 
        user_id,ceil(100*((sum(if(behavior_type=4,1,0))+1)/(sum(if(behavior_type=2,1,0))+1))) as BuyFavRate 
        from useritem 
        where date_format(usertime,'%%Y%%m%%d') in (%s) 
        group by user_id 
        order by BuyFavRate desc;
        """%(dateString)
    Result=MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath,featureSid)
#以下为测试代码
# userBuyFavRate("20141122-20141127",r"\1122_1127\userBuyFavRate.csv")
コード例 #12
0
ファイル: test.py プロジェクト: UpstairsSB/TianChi
def itemBuyFavRate(dateScope,outputName):
    featureSid="101"
    outputPath=Conf.featureExtractPath+"/"+outputName
    dateString=getDaysString(dateScope)
    SQL=r"(select b.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\
        +"from "\
        +"(( "\
        +"select "\
        +"item_id,count(behavior_type) as Fav "\
        +"from useritem "\
        +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")a right join "\
        +"( "\
        +"select "\
        +"item_id,count(behavior_type) as Buy "\
        +"from useritem "\
        +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")b "\
        +"on a.item_id=b.item_id "\
        +")) "\
        +"union "\
        +"(select a.item_id,ceil(1000*((if(Buy is null,1,Buy+1))/(if(Fav is null,1,Fav+1)))) as BuyFavRate "\
        +"from "\
        +"(( "\
        +"select "\
        +"item_id,count(behavior_type) as Fav "\
        +"from useritem "\
        +"where behavior_type=2 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")a left join "\
        +"( "\
        +"select "\
        +"item_id,count(behavior_type) as Buy "\
        +"from useritem "\
        +"where behavior_type=4 and date_format(usertime,'%%Y%%m%%d') in (%s) "%(dateString)\
        +"group by item_id "\
        +")b "\
        +"on a.item_id=b.item_id "\
        +")) "\
        +"order by BuyFavRate desc;"
    Result=MySQL.getData(SQL)
    MySQL.OutputTo(Result, outputPath,featureSid)
コード例 #13
0
def ItemWantedFilter():
    SQL=r'select distinct item_id from item'
    result=MySQL.getData(SQL)
    outputPath=Conf.filterPath+"/"+"ItemWanted.csv"
    MySQL.OutputTo(result, outputPath)