Beispiel #1
0
def find_best_params(X,
                     y,
                     est_class,
                     param_grid,
                     val_size=40000,
                     n_splits=1,
                     random_state=42,
                     **fit_kwargs):
    logging.info("Starting grid search")
    best_params = best_auc = None
    for params in ParameterGrid(param_grid):
        logging.info("Evaluating params: %s", params)
        estimator = est_class(**params)

        splitter = ShuffleSplit(n_splits, val_size, random_state=random_state)
        aucs = []
        for train_ids, valid_ids in splitter.split(X):
            X_valid, y_valid = X[valid_ids], y[valid_ids]
            estimator.fit(X[train_ids], y[train_ids], X_valid, y_valid,
                          **fit_kwargs)

            aucs.append(
                bpr_auc_by_users(y_valid, estimator.predict_proba(X_valid),
                                 X_valid[:, 0].reshape(-1)))

        auc = np.mean(aucs)
        if best_auc is None or best_auc < auc:
            best_params = params
            best_auc = auc
            logging.info("Best auc=%.3f, params: %s", auc, params)
    return best_params
Beispiel #2
0
def main():
    ifd = get_item_feature_data(get_ifd_path(args.data_dir))
    item_feature_m = ifd.m.todense()

    X, uid_idx, iid_idx = load_data(get_training_path(args.data_dir),
                                    iid_idx=ifd.obj_to_row)
    X, y = sample_negative(X)

    param_grid = {
        "n_epochs": [5],
        "n_factors": [10],
        "lambda_ol": [0.1, 0.01, 0.001],
        "lambda_hl": [0.1, 0.01, 0.001],
        "h_layers": [[32, 32, 32]],
        "learning_rate": [0.01, 0.001],
        "dropout_rate": [None, 0.25, 0.5],
        "batch_norm_momentum": [None, 0.95, 0.99],
        "random_state": [args.random_state],
        "batch_size": [50000],
        "n_users": [len(uid_idx)],
        "n_items": [ifd.n_items],
        "n_features": [ifd.n_features]
    }

    best_params = find_best_params(X,
                                   y,
                                   DLPW,
                                   param_grid,
                                   random_state=args.random_state,
                                   item_feature_m=item_feature_m)

    logging.info("Training final fpw, params: %s", best_params)
    pw = DLPW(**best_params)
    pw.fit(X, y, item_feature_m=item_feature_m)

    for temperature in ["warm", "cold", None]:
        testing_path = get_testing_path(args.data_dir, temperature)
        X_test, _, _ = load_data(testing_path, uid_idx, iid_idx)
        X_test, y_test = sample_negative(X_test)

        y_proba = np.array([])
        y_pred = np.array([])
        for offset in range(0, X_test.shape[0], args.step):
            limit = min(offset + args.step, X_test.shape[0])
            X_test_step = X_test[offset:limit]

            y_proba = np.r_[y_proba, pw.predict_proba(X_test_step)]
            y_pred = np.r_[y_pred, pw.predict(X_test_step)]

        uids = X_test[:, 0].reshape(-1)
        auc = bpr_auc_by_users(y_test, y_proba, uids)
        acc = accuracy_score_avg_by_users(y_test, y_pred, uids)
        logging.info("Test: acc=%.3f, auc=%.3f", acc, auc)
Beispiel #3
0
def main():
    X, uid_idx, iid_idx = load_data(get_training_path(args.data_dir))
    X, y = sample_negative(X)

    param_grid = {
        "n_epochs": [5],
        "n_factors": [10],
        "lambda_p": [0.01],
        "lambda_q": [0.01],
        "learning_rate": [0.01],
        "random_state": [args.random_state],
        "batch_size": [50000],
        "n_users": [len(uid_idx)],
        "n_items": [len(iid_idx)]
    }

    best_params = find_best_params(
        X, y, BPR, param_grid, random_state=args.random_state
    )

    logging.info("Training final bpr, params: %s", best_params)
    bpr = BPR(**best_params)
    bpr.fit(X, y)

    testing_path = get_testing_path(args.data_dir, "warm")
    X_test, _, _ = load_data(testing_path, uid_idx, iid_idx)
    X_test, y_test = sample_negative(X_test)

    y_proba = np.array([])
    y_pred = np.array([])
    for offset in range(0, X_test.shape[0], args.step):
        limit = min(offset + args.step, X_test.shape[0])
        X_test_step = X_test[offset: limit]

        y_proba = np.r_[y_proba, bpr.predict_proba(X_test_step)]
        y_pred = np.r_[y_pred, bpr.predict(X_test_step)]

    uids = X_test[:, 0].reshape(-1)
    auc = bpr_auc_by_users(y_test, y_proba, uids)
    acc = accuracy_score_avg_by_users(y_test, y_pred, uids)
    logging.info("Test: acc=%.3f, auc=%.3f", acc, auc)
Beispiel #4
0
def main():
    ifd = get_item_feature_data(get_ifd_path(args.data_dir))
    ufd = get_user_feature_data(get_ufd_path(args.data_dir))
    cb = CB(ufd, ifd)

    for temperature in ["warm", "cold", None]:
        testing_path = get_testing_path(args.data_dir, temperature)
        X_test, _, _ = load_data(testing_path, ufd.obj_to_row, ifd.obj_to_row)
        X_test, y_test = sample_negative(X_test)

        y_proba = np.array([])
        y_pred = np.array([])
        for offset in range(0, X_test.shape[0], args.step):
            limit = min(offset + args.step, X_test.shape[0])
            X_test_step = X_test[offset: limit]

            y_proba = np.r_[y_proba, cb.predict_proba(X_test_step)]
            y_pred = np.r_[y_pred, cb.predict(X_test_step)]

        uids = X_test[:, 0].reshape(-1)
        auc = bpr_auc_by_users(y_test, y_proba, uids)
        acc = accuracy_score_avg_by_users(y_test, y_pred, uids)
        logging.info("Test: acc=%.3f, auc=%.3f", acc, auc)