def check_fit(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    est = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_linear=False, fit_lower=None,
                                        max_iter=15000, beta=1e-6, tol=1e-3,
                                        random_state=0)
    est.fit(X, y)
    y_pred = est.predict(X)
    err = mean_squared_error(y, y_pred)

    assert_less_equal(
        err,
        1e-6,
        msg="Error {} too big for degree {}.".format(err, degree))
def check_same_as_slow(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    reg = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_lower=None, fit_linear=False,
                                        beta=1, warm_start=False, tol=1e-3,
                                        max_iter=5, random_state=0)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        reg.fit(X, y)

        P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree,
                                    n_components=5, beta=1, n_iter=5,
                                    tol=1e-3, random_state=0)

    assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
def test_random_starts():
    noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])
    X_train, X_test = X[:10], X[10:]
    y_train, y_test = noisy_y[:10], noisy_y[10:]

    scores = []
    # init_lambdas='ones' is important to reduce variance here
    reg = FactorizationMachineRegressor(degree=2, n_components=n_components,
                                        beta=5, fit_lower=None,
                                        fit_linear=False, max_iter=2000,
                                        init_lambdas='ones', tol=0.001)
    for k in range(10):
        reg.set_params(random_state=k)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        scores.append(mean_squared_error(y_test, y_pred))

    assert_less_equal(np.std(scores), 0.001)
def test_augment():
    """Test that augmenting the data increases the dimension as expected"""
    y = _poly_predict(X, P, lams, kernel="anova", degree=3)
    fm = FactorizationMachineRegressor(degree=3, fit_lower='augment',
                                       fit_linear=True, tol=0.1)
    fm.fit(X, y)
    assert_equal(n_features + 1, fm.P_.shape[2],
                 msg="Augmenting is wrong with explicit linear term.")

    fm.set_params(fit_linear=False)
    fm.fit(X, y)
    assert_equal(n_features + 2, fm.P_.shape[2],
                 msg="Augmenting is wrong with augmented linear term.")
def check_same_as_slow(degree):

    # XXX: test fails under windows 32bit, presumably numerical issues.
    if ctypes.sizeof(ctypes.c_voidp) < 8:
        raise SkipTest("Numerical inconsistencies on Win32")

    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    reg = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_lower=None, fit_linear=False,
                                        beta=1e-8, warm_start=False, tol=1e-3,
                                        max_iter=10, random_state=0)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        reg.fit(X, y)

        P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree,
                                    n_components=5, beta=1e-8, n_iter=10,
                                    tol=1e-3, random_state=0)

    assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
def check_improve(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    est = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_lower=None, fit_linear=False,
                                        beta=0.0001, max_iter=5, tol=0,
                                        random_state=0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        y_pred_5 = est.fit(X, y).predict(X)
        est.set_params(max_iter=10)
        y_pred_10 = est.fit(X, y).predict(X)

    assert_less_equal(mean_squared_error(y, y_pred_10),
                      mean_squared_error(y, y_pred_5),
                      msg="More iterations do not improve fit.")
def check_overfit(degree):
    noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])
    X_train, X_test = X[:10], X[10:]
    y_train, y_test = noisy_y[:10], noisy_y[10:]

    # weak regularization, should overfit
    est = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_linear=False, fit_lower=None,
                                        beta=1e-4, tol=0.01, random_state=0)
    y_train_pred_weak = est.fit(X_train, y_train).predict(X_train)
    y_test_pred_weak = est.predict(X_test)

    est.set_params(beta=10)  # high value of beta -> strong regularization
    y_train_pred_strong = est.fit(X_train, y_train).predict(X_train)
    y_test_pred_strong = est.predict(X_test)

    assert_less_equal(mean_squared_error(y_train, y_train_pred_weak),
                      mean_squared_error(y_train, y_train_pred_strong),
                      msg="Training error does not get worse with regul.")

    assert_less_equal(mean_squared_error(y_test, y_test_pred_strong),
                      mean_squared_error(y_test, y_test_pred_weak),
                      msg="Test error does not get better with regul.")
def check_warm_start(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
    # Result should be the same if:
    # (a) running 10 iterations
    clf_10 = FactorizationMachineRegressor(degree=degree, n_components=5,
                                           fit_lower=None, fit_linear=False,
                                           max_iter=10, warm_start=False,
                                           random_state=0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        clf_10.fit(X, y)

    # (b) running 5 iterations and 5 more
    clf_5_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
                                            fit_lower=None, fit_linear=False,
                                            max_iter=5, warm_start=True,
                                            random_state=0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        clf_5_5.fit(X, y)
        P_fit = clf_5_5.P_.copy()
        lams_fit = clf_5_5.lams_.copy()
        clf_5_5.fit(X, y)

    # (c) running 5 iterations when starting from previous point.
    clf_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
                                          fit_lower=None, fit_linear=False,
                                          max_iter=5, warm_start=True,
                                          random_state=0)
    clf_5.P_ = P_fit
    clf_5.lams_ = lams_fit
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        clf_5.fit(X, y)

    assert_array_almost_equal(clf_10.P_, clf_5_5.P_)
    assert_array_almost_equal(clf_10.P_, clf_5.P_)

    # Prediction results should also be the same if:
    # (note: could not get this test to work for the exact P_.)

    noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
    noisy_y += rng.randn(noisy_y.shape[0])
    X_train, X_test = X[:10], X[10:]
    y_train, y_test = noisy_y[:10], noisy_y[10:]

    beta_low = 0.5
    beta = 0.1
    beta_hi = 1
    ref = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_linear=False, fit_lower=None,
                                        beta=beta, max_iter=20000,
                                        random_state=0)
    ref.fit(X_train, y_train)
    y_pred_ref = ref.predict(X_test)

    # (a) starting from lower beta, increasing and refitting
    from_low = FactorizationMachineRegressor(degree=degree, n_components=5,
                                             fit_lower=None, fit_linear=False,
                                             beta=beta_low, warm_start=True,
                                             random_state=0)
    from_low.fit(X_train, y_train)
    from_low.set_params(beta=beta)
    from_low.fit(X_train, y_train)
    y_pred_low = from_low.predict(X_test)

    # (b) starting from higher beta, decreasing and refitting
    from_hi = FactorizationMachineRegressor(degree=degree, n_components=5,
                                            fit_lower=None, fit_linear=False,
                                            beta=beta_hi, warm_start=True,
                                            random_state=0)
    from_hi.fit(X_train, y_train)
    from_hi.set_params(beta=beta)
    from_hi.fit(X_train, y_train)
    y_pred_hi = from_hi.predict(X_test)

    assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=4)
    assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=4)
def test_convergence_warning():
    y = _poly_predict(X, P, lams, kernel="anova", degree=3)

    est = FactorizationMachineRegressor(degree=3, beta=1e-8, max_iter=1,
                                        random_state=0)
    assert_warns_message(UserWarning, "converge", est.fit, X, y)
Ejemplo n.º 10
0
        lr.fit(x_all[:len(date), :], y)
        y_all = lr.predict(x_all)
        y_all_predict = y_all[len(date):, :]

        linear_weight = [5, 5]
        y_linear = np.zeros(len(predict_date), )
        for i in xrange(len(predict_date)):
            y_linear[i] = float(predict_linear[i] * linear_weight[0] +
                                y_all_predict[i] * linear_weight[1]) / float(
                                    sum(linear_weight))

        if FM_model_switch == 1:
            FM_model = FactorizationMachineRegressor(
                n_components=4,
                alpha=0,
                beta=0,
                init_lambdas='random_signs',
                max_iter=1000,
                verbose=False)
            FM_model.fit(x_all[:len(date), :], y)
            predict_FM = FM_model.predict(x_all[len(date):, :])
        else:
            predict_FM = np.zeros(len(predict_date), )

        if linear_bodong == 1:
            if linear_bodong_guize_FM == 0:
                calc_bodong_data = y_linear
            elif linear_bodong_guize_FM == 1:
                calc_bodong_data = predict_FM
            y_all_bodong = calc_bodong_data - np.mean(calc_bodong_data)
            y_4 += y_all_bodong[:, np.newaxis]
Ejemplo n.º 11
0
    print("y_train {0}".format(y_train.shape))
    print("X_test {0}".format(X_test.shape))
    print("X_test.format = {0}".format(X_test.format))
    print("X_test.dtype = {0}".format(X_test.dtype))
    print("y_test {0}".format(y_test.shape))
    print()

    print("Training regressors")
    print("===================")
    f1, accuracy, train_time, test_time = {}, {}, {}, {}

    print("Training our solver... ", end="")
    fm = FactorizationMachineRegressor(n_components=20,
                                       fit_linear=True,
                                       fit_lower=False,
                                       alpha=5,
                                       beta=5,
                                       degree=2,
                                       random_state=0,
                                       max_iter=100)
    t0 = time()
    fm.fit(X_train, y_train)
    train_time['polylearn'] = time() - t0
    t0 = time()
    y_pred = fm.predict(X_test) > 0
    test_time['polylearn'] = time() - t0
    accuracy['polylearn'] = accuracy_score(y_test, y_pred)
    f1['polylearn'] = f1_score(y_test, y_pred)
    print("done")

    try:
        from fastFM import als