Example #1
0
def test_als_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    y_pred = fm.predict(X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    print(fm.iter_count)
    y_pred = fm.predict(X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5)
    print(fm.iter_count)
    y_pred = fm.predict(X_test)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)

    print(error_5_iter, error_5_iter_plus_5, error_10_iter)

    assert error_10_iter == error_5_iter_plus_5
Example #2
0
def test_find_init_stdev():
    X, y, coef = make_user_item_regression(label_stdev=.5)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=10, rank=5)
    best_init_stdev, mse = mcmc.find_init_stdev(fm,
                                                X_train,
                                                y_train,
                                                stdev_range=[0.2, 0.5, 1.0])
    best_init_stdev_bad, _ = mcmc.find_init_stdev(fm,
                                                  X_train,
                                                  y_train,
                                                  stdev_range=[5.])
    print('--' * 30)
    best_init_stdev_vali, mse_vali = mcmc.find_init_stdev(
        fm, X_train, y_train, X_test, y_test, stdev_range=[0.2, 0.5, 1.0])
    assert best_init_stdev < best_init_stdev_bad
    assert best_init_stdev_vali == best_init_stdev
    assert mse_vali > mse
Example #3
0
def test_make_user_item_regression():
    from fastFM2.mcmc import FMRegression
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    fm = FMRegression(rank=2)
    y_pred = fm.fit_predict(sp.csc_matrix(X_train),
                            y_train, sp.csc_matrix(X_test))

    # generate data with noisy lables
    X, y, coef = make_user_item_regression(label_stdev=2)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    fm = FMRegression(rank=2)
    y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train),
                                  y_train, sp.csc_matrix(X_test))
    assert mean_squared_error(y_pred_noise, y_test) > \
        mean_squared_error(y_pred, y_test)
Example #4
0
def test_warm_start_path():

    X, y, coef = make_user_item_regression(label_stdev=.4)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)
    n_iter = 10

    rank = 4
    seed = 333
    step_size = 1
    l2_reg_w = 0
    l2_reg_V = 0

    fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w,
                          l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
    # initalize coefs
    fm.fit(X_train, y_train)

    rmse_train = []
    rmse_test = []
    for i in range(1, n_iter):
        fm.fit(X_train, y_train, n_more_iter=step_size)
        rmse_train.append(np.sqrt(mean_squared_error(
            fm.predict(X_train), y_train)))
        rmse_test.append(np.sqrt(mean_squared_error(
            fm.predict(X_test), y_test)))

    print('------- restart ----------')
    values = np.arange(1, n_iter)
    rmse_test_re = []
    rmse_train_re = []
    for i in values:
        fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w,
                              l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
        fm.fit(X_train, y_train)
        rmse_test_re.append(np.sqrt(mean_squared_error(
            fm.predict(X_test), y_test)))
        rmse_train_re.append(np.sqrt(mean_squared_error(
            fm.predict(X_train), y_train)))

    assert_almost_equal(rmse_train, rmse_train_re)
    assert_almost_equal(rmse_test, rmse_test_re)
Example #5
0
def test_mcmc_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=100, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = mcmc.FMRegression(n_iter=50, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)
    print(error_5_iter, error_5_iter_plus_5, error_10_iter)
    print(fm.hyper_param_)
    assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)