예제 #1
0
def gen_user_buy_with_coupon(matrix_offline, matrix_online, X):
    user_buy = HashSet(default=matrix_offline.default)
    user_buy_with_coupon = HashSet(default=matrix_offline.default)
    for i in xrange(matrix_offline.ndata):
        cid_str = matrix_offline.get_cell(i, 'cid')
        date_str = matrix_offline.get_cell(i, 'date')
        uid_str = matrix_offline.get_cell(i, 'uid')
        if date_str != 'null':
            user_buy.add_one(uid_str)
            if cid_str != 'null':
                user_buy_with_coupon.add_one(uid_str)
    for i in xrange(matrix_online.ndata):
        act_str = matrix_online.get_cell(i, 'act')
        cid_str = matrix_online.get_cell(i, 'cid')
        uid_str = matrix_online.get_cell(i, 'uid')
        if act_str == '1':
            user_buy.add_one(uid_str)
            if cid_str != 'null':
                user_buy_with_coupon.add_one(uid_str)
    user_buy_with_coupon_freq = user_buy.merge_op(
        user_buy_with_coupon, lambda x, y: float(y) * 1.0 / float(x), dft=0.0)
    X.join("uid",
           ["user_buy", "user_buy_with_coupon", "user_buy_with_coupon_freq"],
           user_buy.merge(user_buy_with_coupon,
                          dft=0.0).merge(user_buy_with_coupon_freq,
                                         dft=0.0), ("%s" for i in xrange(3)),
           dft=0.0)
예제 #2
0
def gen_user_get_coupon(offline_source, online_source, X, month):

    offline = Matrix(
        np.genfromtxt(paths.ccf_path + offline_source,
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"],
        ["%s" for i in xrange(7)])
    online = Matrix(
        np.genfromtxt(paths.ccf_path + online_source, delimiter=',',
                      dtype=str),
        ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"],
        ["%s" for i in xrange(7)])

    user_get_coupon = HashSet()
    for i in xrange(offline.ndata):
        uid_str = offline.get_cell(i, "uid")
        cid_str = offline.get_cell(i, "cid")
        if cid_str != 'null':
            user_get_coupon.add_one(uid_str)
    for i in xrange(online.ndata):
        uid_str = online.get_cell(i, "uid")
        cid_str = online.get_cell(i, "cid")
        act_str = online.get_cell(i, "act")
        if cid_str != 'null':
            assert act_str != '0'
            user_get_coupon.add_one(uid_str)
    X.join("uid", ["user_get_coupon"], user_get_coupon, ["%s"], dft=0.0)

    def divide(x, y):
        assert float(x) != -9999 and float(y) != -9999
        assert float(x) <= float(y), "{0} {1}".format(x, y)
        if float(y) == 0:
            return 0.0
        else:
            return float(x) * 1.0 / float(y)

    X.gen_arith_feature("user_buy_with_coupon",
                        "user_get_coupon",
                        "user_use_coupon_freq",
                        divide,
                        "%s",
                        dft=0.0)
예제 #3
0
def gen_merchant_share(X, month):
    offline = Matrix(
        np.genfromtxt(paths.ccf_path + 'offline_train_{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"],
        ["%s" for i in xrange(7)])
    online = Matrix(
        np.genfromtxt(paths.ccf_path + 'online_train_{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"],
        ["%s" for i in xrange(7)])

    merchant_user_buy = HashSet()
    merchant_user_use_coupon = HashSet()
    merchant_user_buy_counter = HashSet()
    merchant_user_use_coupon_counter = HashSet()
    for i in xrange(offline.ndata):
        if i % 100000 == 0:
            print i
        mid_str = offline.get_cell(i, "mid")
        uid_str = offline.get_cell(i, "uid")
        date_str = offline.get_cell(i, "date")
        cid_str = offline.get_cell(i, "cid")
        if date_str != 'null':
            if not merchant_user_buy.has(mid_str):
                merchant_user_buy.set(mid_str, HashSet())
            if not merchant_user_buy.get(mid_str).has(uid_str):
                merchant_user_buy.get(mid_str).add_one(uid_str)
                merchant_user_buy_counter.add_one(mid_str)
            if cid_str != 'null':
                if not merchant_user_use_coupon.has(mid_str):
                    merchant_user_use_coupon.set(mid_str, HashSet())
                if not merchant_user_use_coupon.get(mid_str).has(uid_str):
                    merchant_user_use_coupon.get(mid_str).add_one(uid_str)
                    merchant_user_use_coupon_counter.add_one(mid_str)
    for i in xrange(online.ndata):
        if i % 100000 == 0:
            print i
        mid_str = online.get_cell(i, "mid")
        uid_str = online.get_cell(i, "uid")
        act_str = online.get_cell(i, "act")
        cid_str = online.get_cell(i, "cid")
        if act_str == '1':
            if not merchant_user_buy.has(mid_str):
                merchant_user_buy.set(mid_str, HashSet())
            if not merchant_user_buy.get(mid_str).has(uid_str):
                merchant_user_buy.get(mid_str).add_one(uid_str)
                merchant_user_buy_counter.add_one(mid_str)
            if cid_str != 'null':
                if not merchant_user_use_coupon.has(mid_str):
                    merchant_user_use_coupon.set(mid_str, HashSet())
                if not merchant_user_use_coupon.get(mid_str).has(uid_str):
                    merchant_user_use_coupon.get(mid_str).add_one(uid_str)
                    merchant_user_use_coupon_counter.add_one(mid_str)
    X.join("mid", ["merchant_user_buy"],
           merchant_user_buy_counter, ["%s"],
           dft=0.0)
    X.join("mid", ["merchant_user_use_coupon"],
           merchant_user_use_coupon_counter, ["%s"],
           dft=0.0)
    X.check_point(month)
예제 #4
0
def gen_merchant_buy(X, month):

    offline = Matrix(
        np.genfromtxt(paths.ccf_path + 'offline_train_{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "cid", "dis_rate", "dist", "date_rec", "date"],
        ["%s" for i in xrange(7)])
    online = Matrix(
        np.genfromtxt(paths.ccf_path + 'online_train_{0}.csv'.format(month),
                      delimiter=',',
                      dtype=str),
        ["uid", "mid", "act", "cid", "dis_rate", "date_rec", "date"],
        ["%s" for i in xrange(7)])

    merchant_buy = HashSet()
    merchant_buy_with_coupon = HashSet()
    merchant_distribute_coupon = HashSet()
    for i in xrange(offline.ndata):
        if i % 100000 == 0:
            print i
        mid_str = offline.get_cell(i, "mid")
        date_str = offline.get_cell(i, "date")
        cid_str = offline.get_cell(i, "cid")
        if date_str != 'null':
            merchant_buy.add_one(mid_str)
            if cid_str != 'null':
                merchant_buy_with_coupon.add_one(mid_str)
        if cid_str != 'null':
            merchant_distribute_coupon.add_one(mid_str)
    for i in xrange(online.ndata):
        if i % 100000 == 0:
            print i
        mid_str = online.get_cell(i, "mid")
        act_str = online.get_cell(i, "act")
        cid_str = online.get_cell(i, "cid")
        if act_str == '1':
            merchant_buy.add_one(mid_str)
            if cid_str != 'null':
                merchant_buy_with_coupon.add_one(mid_str)
        if cid_str != 'null':
            assert act_str != '0'
            merchant_distribute_coupon.add_one(mid_str)
    X.join("mid", ["merchant_buy"], merchant_buy, ["%s"], 0.0)
    X.join("mid", ["merchant_buy_with_coupon"], merchant_buy_with_coupon,
           ["%s"], 0.0)
    X.join("mid", ["merchant_distribute_coupon"], merchant_distribute_coupon,
           ["%s"], 0.0)

    def divide(x, y):
        assert float(x) <= float(y)
        if float(y) == 0:
            return 0.0
        else:
            return float(x) * 1.0 / float(y)

    X.gen_arith_feature("merchant_buy_with_coupon",
                        "merchant_buy",
                        "merchant_buy_with_coupon_ratio",
                        divide,
                        "%s",
                        dft=0.0)
    X.gen_arith_feature("merchant_buy_with_coupon",
                        "merchant_distribute_coupon",
                        "merchant_coupon_ratio",
                        divide,
                        "%s",
                        dft=0.0)
    X.check_point(month)