def stack_method(): if "mean" in args.model: all_preds = [ "data/te.ins5.ffm.out", "data/xgb5_test.out", "data/te.ins_bag.ffm.out", "data/te.ins5.fm.out", "data/te.ins3.ffm.out", "data/te.ins20.ffm.out", "data/xgb3_test.out", "data/te.ins.ffm.out", ] else: all_preds = [ "data/te.ins5.ffm.out", "data/xgb5_test.out", "data/te.ins_bag.ffm.out", "data/te.ins5.fm.out", "data/te.ins3.ffm.out", "data/te.ins20.ffm.out", "data/xgb3_test.out", "data/te.ins.ffm.out", "data/te.ins2.ffm.out", ] delta = 1 now = datetime.now() X = [] true_y = [] for t, preds in enumerate(read_preds(all_preds)): if "nn" in args.model or "xgb" in args.model: preds = map(lambda x: relogit(x), preds) X.append(preds) true_y.append(0) if t == delta: print "%s: %s" % (t, datetime.now() - now) delta *= 2 if "nn" in args.model: model = read_dump("%s_model.dump" % args.model) X = np.array(X).astype(np.float32) preds = model.predict_proba(X)[:, 1] elif "xgb" in args.model: dtrain = xgb.DMatrix(np.array(X), label=true_y) bst = xgb.Booster({"nthread": 9}) bst.load_model("%s_model.dump" % args.model) preds = bst.predict(dtrain) elif args.model == "mean": X = np.array(X).astype(np.float32) preds = np.mean(X, 1) with open("result_%s.csv" % args.model, "w") as outfile: outfile.write("ID,IsClick\n") avg_p = 0 cnt = 0 for ID, p in izip(read_sample("data/sampleSubmission.csv"), preds): avg_p += p cnt += 1 outfile.write("%s,%s\n" % (ID, str(p))) print cnt, avg_p / cnt
def stack_method(): if "mean" in args.model: all_preds = [ "data/te.ins5.ffm.out", "data/xgb5_test.out", "data/te.ins_bag.ffm.out", "data/te.ins5.fm.out", "data/te.ins3.ffm.out", "data/te.ins20.ffm.out", "data/xgb3_test.out", "data/te.ins.ffm.out", ] else: all_preds = [ "data/te.ins5.ffm.out", "data/xgb5_test.out", "data/te.ins_bag.ffm.out", "data/te.ins5.fm.out", "data/te.ins3.ffm.out", "data/te.ins20.ffm.out", "data/xgb3_test.out", "data/te.ins.ffm.out", "data/te.ins2.ffm.out", ] delta = 1 now = datetime.now() X = [] true_y = [] for t, preds in enumerate(read_preds(all_preds)): if "nn" in args.model or "xgb" in args.model: preds = map(lambda x: relogit(x), preds) X.append(preds) true_y.append(0) if t == delta: print "%s: %s" % (t, datetime.now() - now) delta *= 2 if "nn" in args.model: model = read_dump("%s_model.dump" % args.model) X = np.array(X).astype(np.float32) preds = model.predict_proba(X)[:, 1] elif "xgb" in args.model: dtrain = xgb.DMatrix(np.array(X), label=true_y) bst = xgb.Booster({'nthread': 9}) bst.load_model("%s_model.dump" % args.model) preds = bst.predict(dtrain) elif args.model == "mean": X = np.array(X).astype(np.float32) preds = np.mean(X, 1) with open("result_%s.csv" % args.model, 'w') as outfile: outfile.write('ID,IsClick\n') avg_p = 0 cnt = 0 for ID, p in izip(read_sample("data/sampleSubmission.csv"), preds): avg_p += p cnt += 1 outfile.write('%s,%s\n' % (ID, str(p))) print cnt, avg_p / cnt
parser.add_argument('--seed', type=int, default=9) parser.add_argument('--date', type=int, default=0) parser.add_argument('--log', type=int, default=1) args = parser.parse_args() if args.mongo: from pymongo import MongoClient import functools32 as functools client = MongoClient('localhost', 27017) db = client.test @functools.lru_cache(maxsize=1000000) def get_ad_info(aid): ad_info = db.ad_info.find_one({"AdID": aid}) return trans_ad_info(ad_info) uid_cnt_dict = read_dump("data/uid_cnt.dump") adid_cnt_dict = read_dump("data/adid_cnt.dump") ipid_cnt_dict = read_dump("data/ipid_cnt.dump") query_cnt_dict = read_dump("data/query_cnt.dump") title_cnt_dict = read_dump("data/title_cnt.dump") query_param_cnt_dict = read_dump("data/query_param_cnt.dump") ad_param_cnt_dict = read_dump("data/ad_param_cnt.dump") user_info_map = get_user_info() category_map = get_category() user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",") user_aid_cnt_iter = next_row(read_tsv("data/user_aid_cnt.csv", delimiter=",")) main()
parser.add_argument('--seed', type=int, default=9) parser.add_argument('--date', type=int, default=0) parser.add_argument('--log', type=int, default=1) args = parser.parse_args() if args.mongo: from pymongo import MongoClient import functools32 as functools client = MongoClient('localhost', 27017) db = client.test @functools.lru_cache(maxsize=1000000) def get_ad_info(aid): ad_info = db.ad_info.find_one({"AdID": aid}) return trans_ad_info(ad_info) uid_cnt_dict = read_dump("data/uid_cnt.dump") adid_cnt_dict = read_dump("data/adid_cnt.dump") ipid_cnt_dict = read_dump("data/ipid_cnt.dump") query_cnt_dict = read_dump("data/query_cnt.dump") title_cnt_dict = read_dump("data/title_cnt.dump") query_param_cnt_dict = read_dump("data/query_param_cnt.dump") ad_param_cnt_dict = read_dump("data/ad_param_cnt.dump") user_info_map = get_user_info() category_map = get_category() user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",") user_aid_cnt_iter = next_row( read_tsv("data/user_aid_cnt.csv", delimiter=",")) main()
parser.add_argument('--seed', type=int, default=9) parser.add_argument('--date', type=int, default=0) parser.add_argument('--log', type=int, default=1) args = parser.parse_args() if args.mongo: from pymongo import MongoClient import functools32 as functools client = MongoClient('localhost', 27017) db = client.test @functools.lru_cache(maxsize=1000000) def get_ad_info(aid): ad_info = db.ad_info.find_one({"AdID": aid}) return trans_ad_info(ad_info) uid_cnt_dict = read_dump("uid_cnt.dump") adid_cnt_dict = read_dump("adid_cnt.dump") ipid_cnt_dict = read_dump("ipid_cnt.dump") query_cnt_dict = read_dump("query_cnt.dump") title_cnt_dict = read_dump("title_cnt.dump") query_param_cnt_dict = read_dump("query_param_cnt.dump") ad_param_cnt_dict = read_dump("ad_param_cnt.dump") user_info_map = get_user_info() category_map = get_category() user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",") user_aid_cnt_iter = next_row(read_tsv("data/user_aid_cnt.csv", delimiter=",")) main()