from tools import local_file_util file = [ line.split('\t') for line in local_file_util.readFile('data/orgin_train_data.tsv') ] s = set([line[4] for line in file])
# -*- coding: utf-8 -* from tools import local_file_util userComment_train = [ l[0].split(',') for l in [ line.split('\"') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/userComment_test.csv') [1:] ] ] orderHistory_train = [ line.split(',') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/orderHistory_test.csv')[1:] ] userComment_train_dict = dict([[(line[0], line[1]), [line[2], line[3]]] for line in userComment_train]) merge_res = [] for orderHistory_train_line in orderHistory_train: add_line = [] userId_orderId = (orderHistory_train_line[0], orderHistory_train_line[1]) add_line = add_line + orderHistory_train_line if userId_orderId in userComment_train_dict: add_line = add_line + userComment_train_dict[userId_orderId] else: add_line = add_line + ['', ''] merge_res.append(add_line)
import xgboost as xgb bst = xgb.Booster({'nthread': 4}) bst.load_model('xgb_model/xgb_v2.model') import numpy as np data = np.loadtxt('data/test_data.tsv', delimiter='\t') test_x = data[:, 1:] test_y = data[:, 0] dtest = xgb.DMatrix(test_x, label=test_y) eval = bst.predict(dtest) from tools import local_file_util file = [ line.split(',')[0] for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:] ] res = [line[0] + ',' + str(line[1]) for line in zip(file, list(eval))] res.insert(0, 'userid,orderType') local_file_util.writeFile('data/submit.csv', res)
# -*- coding: utf-8 -* from tools import local_file_util file = map( lambda line: line.split(','), local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/trainingset/action_train.csv') [1:]) userId_actionTypeList_dict = {} for line in file: if line[0] in userId_actionTypeList_dict: temp = userId_actionTypeList_dict[line[0]] userId_actionTypeList_dict[line[0]] = temp + [line[2] + ':' + line[1]] else: userId_actionTypeList_dict[line[0]] = [line[2] + ':' + line[1]] save_str = sorted(map( lambda key: key + '\t' + str(userId_actionTypeList_dict[key].__len__( )) + '\t' + '\t'.join(userId_actionTypeList_dict[key]), userId_actionTypeList_dict), key=lambda line: int(line.split('\t')[1]), reverse=True) local_file_util.writeFile('data/userId_actionTypeNum.tsv', save_str) #userId actionNum(sort) time:actiontye time2:actiontype
# -*- coding: utf-8 -* from tools import local_file_util file = map(lambda line: line.split('\t'), local_file_util.readFile('data/user_orderNum.tsv')) orderNum_userId_dic = {} for userId_orderNum in file: if userId_orderNum[1] in orderNum_userId_dic: temp = orderNum_userId_dic[userId_orderNum[1]] + [userId_orderNum[0]] orderNum_userId_dic[userId_orderNum[1]] = temp else: orderNum_userId_dic[userId_orderNum[1]] = [userId_orderNum[0]] orderNum_userIdNum = sorted(map( lambda key: (key, orderNum_userId_dic[key].__len__()), orderNum_userId_dic), key=lambda tuple: tuple[1], reverse=True) save_str = map(lambda line: line[0] + '\t' + str(line[1]), orderNum_userIdNum) local_file_util.writeFile('data/orderNum_userIdNum.tsv', save_str)
# -*- coding: utf-8 -* #统计每个用户的订单数量 from tools import local_file_util file = map( lambda line: line.split(','), local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/trainingset/orderHistory_train.csv' )[1:]) userId_orderId_list = map(lambda line: (line[0], line[1]), file) dic = {} #userid: list(orderid1, oderid2) for userId_orderId in userId_orderId_list: if userId_orderId[0] in dic: temp = dic[userId_orderId[0]] + [userId_orderId[1]] dic[userId_orderId[0]] = temp else: dic[userId_orderId[0]] = [userId_orderId[1]] userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic), key=lambda tuple: tuple[1], reverse=True) res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum) local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)
# -*- coding: utf-8 -* #how to make test: 1: origin_train_line = ['-1', user_id] 2: trainningset to test 3:change save file from tools import local_file_util from itertools import groupby import numpy as np orderFuture_train = [ line.split(',') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:] ] action_train = [ line.split(',') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/action_test.csv')[1:] ] orderHistory_comment_train = [ line.split(',') for line in local_file_util.readFile( 'data/merge_orderHistory_userComment_test.csv') ] userProfile_train = [ line.split(',') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/userProfile_test.csv')[1:] ] continent_rate_dict = { '大洋洲': 1.4 / 0.33, '欧洲': 19.1 / 7.25, '非洲': 2.2 / 8.69,
# -*- coding: utf-8 -* #统计每个用户的订单数量 from tools import local_file_util file =map(lambda line: line.split(','), local_file_util.readFile('bigdata/皇包车比赛/皇包车比赛数据-非压缩包/trainingset/orderHistory_train.csv')[1:]) userId_orderId_list = map(lambda line:(line[0], line[1]), file) dic = {} #userid: list(orderid1, oderid2) for userId_orderId in userId_orderId_list: if userId_orderId[0] in dic: temp = dic[userId_orderId[0]]+ [userId_orderId[1]] dic[userId_orderId[0]] = temp else: dic[userId_orderId[0]] = [userId_orderId[1]] userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic), key=lambda tuple: tuple[1], reverse=True) res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum) local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)