def get_customer_history(flag, n=1000):
    in_file = getattr(FileNames, '{}_v2'.format(flag))
    out_file = getattr(FileNames, '{}_customer_hist_nn_data'.format(flag))

    hist_flag = flag
    if flag == 'val':
        hist_flag = 'tr'
    if flag == 'test':
        hist_flag = 'train'

    hist_file = getattr(FileNames, 'cust_{}_artifact1'.format(hist_flag))

    df = load_pickle(in_file)
    hist = load_pickle(hist_file)
    agg = ListAggregation(date_col=FieldNames.campaign_start_date,
                          user_col=FieldNames.customer_id,
                          key_col=FieldNames.item_set,
                          hist_artifact=hist)
    arr = agg.transform(df)
    padded_arr = []
    for row in arr:
        parr = pad_sequences(row,
                             maxlen=n,
                             padding='pre',
                             truncating='pre',
                             value=0,
                             dtype='int32')
        padded_arr.append(parr)
    padded_arr = np.concatenate(padded_arr)
    save_npy(out_file, padded_arr)
def load_data(flag="val"):
    if flag == "val":
        x_tr = load_pickle(FileNames.tr_features_v1)
        x_val = load_pickle(FileNames.val_features_v1)
    elif flag == "test":
        x_tr = load_pickle(FileNames.train_features_v1)
        x_val = load_pickle(FileNames.test_features_v1)

    return make_x_y(x_tr, x_val, flag=flag)
def map_campign_id(x_tr, x_val, flag="val"):
    if flag == "val":
        tr = load_pickle(FileNames.tr_v2)
        val = load_pickle(FileNames.val_v2)
    elif flag == "test":
        tr = load_pickle(FileNames.train_v2)
        val = load_pickle(FileNames.test_v2)

    x_tr["campaign_id"] = tr["campaign_id"].values
    x_val["campaign_id"] = val["campaign_id"].values
    return x_tr, x_val
예제 #4
0
def generate_features(flag):
    if flag == "test":
        tr_artifact_file = FileNames.train_artifact
        hist_artifact_files = [
            FileNames.cust_train_artifact1,
            FileNames.cust_train_artifact2,
            FileNames.cust_train_artifact3,
            FileNames.cust_train_artifact4,
        ]
        tr_file = FileNames.train_v2
        te_file = FileNames.test_v2
        tr_save_file = FileNames.train_features_v1
        te_save_file = FileNames.test_features_v1
    elif flag == "val":
        tr_artifact_file = FileNames.tr_artifact
        hist_artifact_files = [
            FileNames.cust_tr_artifact1,
            FileNames.cust_tr_artifact2,
            FileNames.cust_tr_artifact3,
            FileNames.cust_tr_artifact4,
        ]
        tr_file = FileNames.tr_v2
        te_file = FileNames.val_v2
        tr_save_file = FileNames.tr_features_v1
        te_save_file = FileNames.val_features_v1
    else:
        print("flag not VALD!")
    tr_artifact = load_pickle(tr_artifact_file)
    hist_artifacts = [
        load_pickle(hist_file) for hist_file in hist_artifact_files
    ]
    columns = get_feature_names(3)
    tr_data = load_pickle(tr_file)
    te_data = load_pickle(te_file)
    all_data = pd.concat([tr_data, te_data])
    pipeline = get_feature_pipeline(tr_artifact, hist_artifacts, all_data)

    x_tr = pipeline.fit_transform(tr_data)
    x_te = pipeline.transform(te_data)

    x_tr = pd.DataFrame(x_tr, columns=columns)
    x_te = pd.DataFrame(x_te, columns=columns)
    x_tr[FieldNames.target] = tr_data[FieldNames.target].values
    if flag == "val":
        x_te[FieldNames.target] = te_data[FieldNames.target].values
    save_pickle(x_tr, tr_save_file)
    save_pickle(x_te, te_save_file)
def save_transaction_artifact(flag):
    """Sace artifacts for customer transactions with different conditions."""
    if flag == 'test':
        inp_file = FileNames.transaction_test_v1
        save_file1 = FileNames.cust_train_artifact1
        save_file2 = FileNames.cust_train_artifact2
        save_file3 = FileNames.cust_train_artifact3
        save_file4 = FileNames.cust_train_artifact4
    elif flag == 'val':
        inp_file = FileNames.transaction_val_v1
        save_file1 = FileNames.cust_tr_artifact1
        save_file2 = FileNames.cust_tr_artifact2
        save_file3 = FileNames.cust_tr_artifact3
        save_file4 = FileNames.cust_tr_artifact4
    else:
        print('flag not VALID!')

    transactions = load_pickle(inp_file)
    transactions_grp = group_transactions(transactions)
    artifact = _get_transaction_artifact(transactions_grp)
    save_pickle(artifact, save_file1)
    del artifact, transactions_grp
    print("Customer artifact 1 done!")

    transactions2 = transactions.loc[
        np.abs(transactions[FieldNames.coupon_discount]) > 0]
    transactions_grp2 = group_transactions(transactions2)
    artifact = _get_transaction_artifact(transactions_grp2)
    save_pickle(artifact, save_file2)
    del transactions2, transactions_grp2, artifact
    print("Customer artifact 2 done!")

    transactions3 = transactions.loc[
        (np.abs(transactions[FieldNames.coupon_discount]) > 0)
        & (np.abs(transactions[FieldNames.other_discount]) > 0)]
    transactions_grp3 = group_transactions(transactions3)
    artifact = _get_transaction_artifact(transactions_grp3)
    save_pickle(artifact, save_file3)
    del transactions3, transactions_grp3, artifact
    print("Customer artifact 3 done!")

    transactions4 = transactions.loc[(np.abs(
        transactions[FieldNames.coupon_discount]) > np.abs(
            transactions[FieldNames.other_discount]))]
    transactions_grp4 = group_transactions(transactions4)
    artifact = _get_transaction_artifact(transactions_grp4)
    save_pickle(artifact, save_file4)
    del transactions4, artifact
    print("Customer artifact 4 done!")
def save_train_artifact(flag):
    """Create artifact using training data."""
    if flag == 'test':
        inp_file = FileNames.train_v2
        save_file = FileNames.train_artifact
    elif flag == 'val':
        inp_file = FileNames.tr_v2
        save_file = FileNames.tr_artifact

    tr = load_pickle(inp_file)
    tr_artifact = HistoricalArtifact(
        tr,
        user_field=FieldNames.customer_id,
        date_field=FieldNames.campaign_start_date,
        key_fields=[
            FieldNames.campaign_id,
            FieldNames.coupon_id,
            FieldNames.target,
            FieldNames.item_category,
        ],
    )
    save_pickle(tr_artifact, save_file)
def get_save_coupon_vecs(flag, brty_map, cat_map):
    in_file = getattr(FileNames, '{}_v2'.format(flag))
    out_file = getattr(FileNames, '{}_coupon_nn_data'.format(flag))
    df = load_pickle(in_file)
    coupon_vectors = prepare_coupon_data(df, brty_map, cat_map)
    save_npy(out_file, coupon_vectors)