def getShopFeatures(filename, gap): #lis,shop_info = shopInfoStat() group = getCityGroup() res, l, names = loadShopFeature(helper.get_feature_path(gap, filename), group) grp_feature = collections.defaultdict(dict) for grp, item in res.items(): for date, features in item.items(): #print features max_feature = list(np.max(features, axis=0)) min_feature = list(np.min(features, axis=0)) median_feature = list(np.median(features, axis=0)) grp_feature[grp][date] = max_feature + min_feature + median_feature msg = map(lambda x: 'locate_max_' + x, names) + map( lambda x: 'locate_min_' + x, names) + map( lambda x: 'locate_median_' + x, names) with open(helper.get_feature_path(gap, 'locate_' + filename), 'w') as fout: flag = True for shop in xrange(1, 2001): shop = str(shop) grp = group[shop] if flag: fout.write("%s\n" % (",".join(msg))) flag = False for date in grp_feature[grp]: fea = grp_feature[grp][date] tmp = ",".join(map(lambda x: str(x), fea)) fout.write("%s,%s,%s\n" % (tmp, shop, date))
def getShopFeatures(gap): shop_info = shopInfo() with open(helper.get_feature_path(gap,'shop_s'),'w') as fout: fout.write("per_pay,score,comment_cnt,shop_level,id\n") for shop in xrange(1,2001): tmp = ",".join(map(lambda x: str(x),shop_info[str(shop)])) fout.write("%s,%d,%d\n"%(tmp,shop,shop))
def getShopFeatures(gap): airdata = loadAirData() shop_info = helper.loadShopInfo() with open(helper.get_feature_path(gap,'air_feature'),'w') as fout: fout.write("air\n") for shop in xrange(1,2001): shop = str(shop) city = shop_info[shop][0] for date in util.getDate(start = config.LABELSTARTDATE): tmp = airdata[airdata.Date == date][city] fout.write("%d,%s,%s\n"%(tmp,shop,date.strftime("%Y-%m-%d")))
def getDateFeatures(gap): festivals = helper.loadFestival() with open(helper.get_feature_path(gap, 'date_d'), 'w') as fout: fout.write("if_work,is_fes,week_day\n") for date in util.getDate(start=config.LABELSTARTDATE): week_day = date.weekday() is_fes = 1 if date in festivals else 0 tmp = 1 if week_day == 0 else 8 - week_day if_work = 1 if week_day < 5 else 0 month_of_week = int(date.strftime("%W")) - int( datetime.datetime(date.year, date.month, 1).strftime("%W")) + 1 fout.write("%d,%d,%d,%s\n" % (if_work, is_fes, week_day, date.strftime("%Y-%m-%d")))
def getShopFeatures(gap): shop_info,lis = shopInfo() with open(helper.get_feature_path(gap,'shop_cate_s'),'w') as fout: fout.write("city,locate,cate1,cate2,cate3\n") for shop in xrange(1,2001): shop = str(shop) tmp = [] tmp.append(lis[0].get(shop_info[shop][0],-1)) tmp.append(lis[1].get(shop_info[shop][1],-1)) tmp.append(lis[2].get(shop_info[shop][6],-1)) tmp.append(lis[3].get(shop_info[shop][7],-1)) tmp.append(lis[4].get(shop_info[shop][8],-1)) tmp = ",".join(map(lambda x: str(x),tmp)) fout.write("%s,%s\n"%(tmp,shop))
def getShopFeatures(gap): weatherdata = loadWeatherData() shop_info = helper.loadShopInfo() flag = True with open(helper.get_feature_path(gap, 'weather_feature'), 'w') as fout: for shop in xrange(1, 2001): shop = str(shop) city = shop_info[shop][0] for date in util.getDate(start=config.LABELSTARTDATE): feature, msg = getFeature(weatherdata[city][date]) if flag: fout.write("%s\n" % (",".join(msg))) flag = False tmp = ",".join(map(str, feature)) fout.write("%s,%s,%s\n" % (tmp, shop, date.strftime("%Y-%m-%d")))
def run(gap): data = helper.loadShopPay() start_date = util.getLabelStartDate(data) fout = open(helper.get_feature_path(gap, 'same_day_stat'), 'w') flag = True for shop in xrange(1, 2001): shop = str(shop) print shop arr = util.toArray(data[shop]) for date in util.getDate(start_date[shop]): feature, msg = getFeature(date, arr, gap) if flag: fout.write('%s\n' % (','.join(msg))) flag = False util.outputFeature(fout, date, shop, feature) fout.close()
def run(gap): shopdata, shop1data = loadAllData() #data = helper.loadShopPay() #start_date = util.getLabelStartDate(data) fout = open(helper.get_feature_path(gap, 'day_feature'), 'w') flag = True for shop in xrange(1, 2001): shop = str(shop) print shop feature, msg = getFeature(shopdata[shop], shop1data[shop]) if flag: fout.write('%s\n' % (','.join(msg))) flag = False tmp = ",".join(map(str, feature)) fout.write("%s,%s\n" % (tmp, shop)) fout.close()
def run(gap): data = helper.loadShopPay() start_date = util.getLabelStartDate(data) fout = open( helper.get_feature_path( gap, 'time_win_diff_%s_%s_%s' % (sys.argv[2], sys.argv[3], sys.argv[4])), 'w') flag = True for shop in xrange(1, 2001): shop = str(shop) print shop arr = util.toArray(data[shop]) #arr = util.fillNull(arr) for date in util.getDate(start_date[shop]): feature, msg = getFeature(date, arr, gap) if flag: fout.write('%s\n' % (','.join(msg))) flag = False util.outputFeature(fout, date, shop, feature) fout.close()
def run(gap): helper.log_time('start loading...') date_feature = [] shop_feature = [] date_feature = ['date_d'] shop_feature = ['shop_s', 'shop_cate_s', 'day_feature'] #mix_feature = ['label','history','air_feature','weather_feature'] mix_feature = [ 'label', 'same_day_hist', 'time_win_1_1_21', 'time_win_log_1_1_21', 'time_win_2_2_7', 'time_win_3_6_7', 'time_win_7_14_7', 'weekend_stat', 'weekday_stat', 'same_day_stat', 'min_max_stat', 'air_feature', 'weather_feature' ] #mix_feature = ['label','same_day_hist','time_win_1_1_21','time_win_2_2_7','time_win_2_4_7','time_win_3_6_7','time_win_7_7_7','time_win_7_14_7','time_win_diff_1_1_21','weekend_stat','weekday_stat','time_win_diff_1_1_21','min_max_stat','air_feature','weather_feature','same_day_stat','city_cate2_same_day_hist','city_cate2_time_win_1_1_21'] #mix_feature = ['label','same_day_hist','time_win_1_1_21','time_win_2_2_7','time_win_2_4_7','time_win_3_6_7','time_win_7_7_7','time_win_7_14_7','time_win_diff_1_1_21','time_win_diff_1_1_21'] #mix_feature = ['label','same_day_hist','same_day_stat','same_day_diff_mean','week_hist','week_stat','weekend_stat','weekday_stat','min_max_stat','air_feature','weather_feature','city_cate2_same_day_hist','city_cate2_week_hist','locate_same_day_hist','locate_week_hist','week_hist_2_2','week_hist_2_4','week_hist_3_6','week_hist_4_8'] #mix_feature = ['label','same_day_hist','same_day_stat','same_day_diff_mean','same_day_reg','weekday_stat','weekend_stat','week_hist','week_hist_diff','week_stat','min_max_stat','shop_cate_s','air_feature','weather_feature'] data = {'date': [], 'shop': [], 'mix': []} for d in date_feature: data['date'].append(loadSingleFeature(helper.get_feature_path(gap, d))) for s in shop_feature: data['shop'].append(loadSingleFeature(helper.get_feature_path(gap, s))) for m in mix_feature: data['mix'].append(loadMixFeature(helper.get_feature_path(gap, m))) helper.log_time('finish loading...') helper.log_time('start mix join...') data_join = data['mix'][0][0] names = data['mix'][0][2][1:] for i in xrange(1, len(data['mix'])): m = data['mix'][i][0] l = data['mix'][i][1] names += data['mix'][i][2] for shop, shop_data in data_join.iteritems(): for date, fea in shop_data.iteritems(): if shop in m and date in m[shop]: fea += m[shop][date] else: fea += [np.nan] * l helper.log_time('start date join...') for d, l, n in data['date']: names += n for shop, shop_data in data_join.iteritems(): for date, fea in shop_data.iteritems(): if date in d: fea += d[date] else: fea += [np.nan] * l helper.log_time('start shop join...') for s, l, n in data['shop']: names += n print n for shop, shop_data in data_join.iteritems(): for date, fea in shop_data.iteritems(): if shop in s: fea += s[shop] else: fea += [np.nan] * l helper.log_time('start write...') output(data_join, gap) outputnames(names, gap)