def gen_user_buy_with_coupon(matrix_offline, matrix_online, X): user_buy = HashSet(default=matrix_offline.default) user_buy_with_coupon = HashSet(default=matrix_offline.default) for i in xrange(matrix_offline.ndata): cid_str = matrix_offline.get_cell(i, 'cid') date_str = matrix_offline.get_cell(i, 'date') uid_str = matrix_offline.get_cell(i, 'uid') if date_str != 'null': user_buy.add_one(uid_str) if cid_str != 'null': user_buy_with_coupon.add_one(uid_str) for i in xrange(matrix_online.ndata): act_str = matrix_online.get_cell(i, 'act') cid_str = matrix_online.get_cell(i, 'cid') uid_str = matrix_online.get_cell(i, 'uid') if act_str == '1': user_buy.add_one(uid_str) if cid_str != 'null': user_buy_with_coupon.add_one(uid_str) user_buy_with_coupon_freq = user_buy.merge_op( user_buy_with_coupon, lambda x, y: float(y) * 1.0 / float(x), dft=0.0) X.join("uid", ["user_buy", "user_buy_with_coupon", "user_buy_with_coupon_freq"], user_buy.merge(user_buy_with_coupon, dft=0.0).merge(user_buy_with_coupon_freq, dft=0.0), ("%s" for i in xrange(3)), dft=0.0)
def gen_user_get_coupon(offline_source, online_source, X, month): offline = Matrix( np.genfromtxt(paths.ccf_path + offline_source, delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"], ["%s" for i in xrange(7)]) online = Matrix( np.genfromtxt(paths.ccf_path + online_source, delimiter=',', dtype=str), ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"], ["%s" for i in xrange(7)]) user_get_coupon = HashSet() for i in xrange(offline.ndata): uid_str = offline.get_cell(i, "uid") cid_str = offline.get_cell(i, "cid") if cid_str != 'null': user_get_coupon.add_one(uid_str) for i in xrange(online.ndata): uid_str = online.get_cell(i, "uid") cid_str = online.get_cell(i, "cid") act_str = online.get_cell(i, "act") if cid_str != 'null': assert act_str != '0' user_get_coupon.add_one(uid_str) X.join("uid", ["user_get_coupon"], user_get_coupon, ["%s"], dft=0.0) def divide(x, y): assert float(x) != -9999 and float(y) != -9999 assert float(x) <= float(y), "{0} {1}".format(x, y) if float(y) == 0: return 0.0 else: return float(x) * 1.0 / float(y) X.gen_arith_feature("user_buy_with_coupon", "user_get_coupon", "user_use_coupon_freq", divide, "%s", dft=0.0)
def gen_merchant_share(X, month): offline = Matrix( np.genfromtxt(paths.ccf_path + 'offline_train_{0}.csv'.format(month), delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"], ["%s" for i in xrange(7)]) online = Matrix( np.genfromtxt(paths.ccf_path + 'online_train_{0}.csv'.format(month), delimiter=',', dtype=str), ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"], ["%s" for i in xrange(7)]) merchant_user_buy = HashSet() merchant_user_use_coupon = HashSet() merchant_user_buy_counter = HashSet() merchant_user_use_coupon_counter = HashSet() for i in xrange(offline.ndata): if i % 100000 == 0: print i mid_str = offline.get_cell(i, "mid") uid_str = offline.get_cell(i, "uid") date_str = offline.get_cell(i, "date") cid_str = offline.get_cell(i, "cid") if date_str != 'null': if not merchant_user_buy.has(mid_str): merchant_user_buy.set(mid_str, HashSet()) if not merchant_user_buy.get(mid_str).has(uid_str): merchant_user_buy.get(mid_str).add_one(uid_str) merchant_user_buy_counter.add_one(mid_str) if cid_str != 'null': if not merchant_user_use_coupon.has(mid_str): merchant_user_use_coupon.set(mid_str, HashSet()) if not merchant_user_use_coupon.get(mid_str).has(uid_str): merchant_user_use_coupon.get(mid_str).add_one(uid_str) merchant_user_use_coupon_counter.add_one(mid_str) for i in xrange(online.ndata): if i % 100000 == 0: print i mid_str = online.get_cell(i, "mid") uid_str = online.get_cell(i, "uid") act_str = online.get_cell(i, "act") cid_str = online.get_cell(i, "cid") if act_str == '1': if not merchant_user_buy.has(mid_str): merchant_user_buy.set(mid_str, HashSet()) if not merchant_user_buy.get(mid_str).has(uid_str): merchant_user_buy.get(mid_str).add_one(uid_str) merchant_user_buy_counter.add_one(mid_str) if cid_str != 'null': if not merchant_user_use_coupon.has(mid_str): merchant_user_use_coupon.set(mid_str, HashSet()) if not merchant_user_use_coupon.get(mid_str).has(uid_str): merchant_user_use_coupon.get(mid_str).add_one(uid_str) merchant_user_use_coupon_counter.add_one(mid_str) X.join("mid", ["merchant_user_buy"], merchant_user_buy_counter, ["%s"], dft=0.0) X.join("mid", ["merchant_user_use_coupon"], merchant_user_use_coupon_counter, ["%s"], dft=0.0) X.check_point(month)
def gen_merchant_buy(X, month): offline = Matrix( np.genfromtxt(paths.ccf_path + 'offline_train_{0}.csv'.format(month), delimiter=',', dtype=str), ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"], ["%s" for i in xrange(7)]) online = Matrix( np.genfromtxt(paths.ccf_path + 'online_train_{0}.csv'.format(month), delimiter=',', dtype=str), ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"], ["%s" for i in xrange(7)]) merchant_buy = HashSet() merchant_buy_with_coupon = HashSet() merchant_distribute_coupon = HashSet() for i in xrange(offline.ndata): if i % 100000 == 0: print i mid_str = offline.get_cell(i, "mid") date_str = offline.get_cell(i, "date") cid_str = offline.get_cell(i, "cid") if date_str != 'null': merchant_buy.add_one(mid_str) if cid_str != 'null': merchant_buy_with_coupon.add_one(mid_str) if cid_str != 'null': merchant_distribute_coupon.add_one(mid_str) for i in xrange(online.ndata): if i % 100000 == 0: print i mid_str = online.get_cell(i, "mid") act_str = online.get_cell(i, "act") cid_str = online.get_cell(i, "cid") if act_str == '1': merchant_buy.add_one(mid_str) if cid_str != 'null': merchant_buy_with_coupon.add_one(mid_str) if cid_str != 'null': assert act_str != '0' merchant_distribute_coupon.add_one(mid_str) X.join("mid", ["merchant_buy"], merchant_buy, ["%s"], 0.0) X.join("mid", ["merchant_buy_with_coupon"], merchant_buy_with_coupon, ["%s"], 0.0) X.join("mid", ["merchant_distribute_coupon"], merchant_distribute_coupon, ["%s"], 0.0) def divide(x, y): assert float(x) <= float(y) if float(y) == 0: return 0.0 else: return float(x) * 1.0 / float(y) X.gen_arith_feature("merchant_buy_with_coupon", "merchant_buy", "merchant_buy_with_coupon_ratio", divide, "%s", dft=0.0) X.gen_arith_feature("merchant_buy_with_coupon", "merchant_distribute_coupon", "merchant_coupon_ratio", divide, "%s", dft=0.0) X.check_point(month)