def check_fit(degree): y = _poly_predict(X, P, lams, kernel="anova", degree=degree) est = FactorizationMachineRegressor(degree=degree, n_components=5, fit_linear=False, fit_lower=None, max_iter=15000, beta=1e-6, tol=1e-3, random_state=0) est.fit(X, y) y_pred = est.predict(X) err = mean_squared_error(y, y_pred) assert_less_equal( err, 1e-6, msg="Error {} too big for degree {}.".format(err, degree))
def check_same_as_slow(degree): y = _poly_predict(X, P, lams, kernel="anova", degree=degree) reg = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=1, warm_start=False, tol=1e-3, max_iter=5, random_state=0) with warnings.catch_warnings(): warnings.simplefilter('ignore') reg.fit(X, y) P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree, n_components=5, beta=1, n_iter=5, tol=1e-3, random_state=0) assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
def test_random_starts(): noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2) noisy_y += 5. * rng.randn(noisy_y.shape[0]) X_train, X_test = X[:10], X[10:] y_train, y_test = noisy_y[:10], noisy_y[10:] scores = [] # init_lambdas='ones' is important to reduce variance here reg = FactorizationMachineRegressor(degree=2, n_components=n_components, beta=5, fit_lower=None, fit_linear=False, max_iter=2000, init_lambdas='ones', tol=0.001) for k in range(10): reg.set_params(random_state=k) y_pred = reg.fit(X_train, y_train).predict(X_test) scores.append(mean_squared_error(y_test, y_pred)) assert_less_equal(np.std(scores), 0.001)
def test_augment(): """Test that augmenting the data increases the dimension as expected""" y = _poly_predict(X, P, lams, kernel="anova", degree=3) fm = FactorizationMachineRegressor(degree=3, fit_lower='augment', fit_linear=True, tol=0.1) fm.fit(X, y) assert_equal(n_features + 1, fm.P_.shape[2], msg="Augmenting is wrong with explicit linear term.") fm.set_params(fit_linear=False) fm.fit(X, y) assert_equal(n_features + 2, fm.P_.shape[2], msg="Augmenting is wrong with augmented linear term.")
def check_same_as_slow(degree): # XXX: test fails under windows 32bit, presumably numerical issues. if ctypes.sizeof(ctypes.c_voidp) < 8: raise SkipTest("Numerical inconsistencies on Win32") y = _poly_predict(X, P, lams, kernel="anova", degree=degree) reg = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=1e-8, warm_start=False, tol=1e-3, max_iter=10, random_state=0) with warnings.catch_warnings(): warnings.simplefilter('ignore') reg.fit(X, y) P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree, n_components=5, beta=1e-8, n_iter=10, tol=1e-3, random_state=0) assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
def check_improve(degree): y = _poly_predict(X, P, lams, kernel="anova", degree=degree) est = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=0.0001, max_iter=5, tol=0, random_state=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") y_pred_5 = est.fit(X, y).predict(X) est.set_params(max_iter=10) y_pred_10 = est.fit(X, y).predict(X) assert_less_equal(mean_squared_error(y, y_pred_10), mean_squared_error(y, y_pred_5), msg="More iterations do not improve fit.")
def check_overfit(degree): noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=degree) noisy_y += 5. * rng.randn(noisy_y.shape[0]) X_train, X_test = X[:10], X[10:] y_train, y_test = noisy_y[:10], noisy_y[10:] # weak regularization, should overfit est = FactorizationMachineRegressor(degree=degree, n_components=5, fit_linear=False, fit_lower=None, beta=1e-4, tol=0.01, random_state=0) y_train_pred_weak = est.fit(X_train, y_train).predict(X_train) y_test_pred_weak = est.predict(X_test) est.set_params(beta=10) # high value of beta -> strong regularization y_train_pred_strong = est.fit(X_train, y_train).predict(X_train) y_test_pred_strong = est.predict(X_test) assert_less_equal(mean_squared_error(y_train, y_train_pred_weak), mean_squared_error(y_train, y_train_pred_strong), msg="Training error does not get worse with regul.") assert_less_equal(mean_squared_error(y_test, y_test_pred_strong), mean_squared_error(y_test, y_test_pred_weak), msg="Test error does not get better with regul.")
def check_warm_start(degree): y = _poly_predict(X, P, lams, kernel="anova", degree=degree) # Result should be the same if: # (a) running 10 iterations clf_10 = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, max_iter=10, warm_start=False, random_state=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf_10.fit(X, y) # (b) running 5 iterations and 5 more clf_5_5 = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, max_iter=5, warm_start=True, random_state=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf_5_5.fit(X, y) P_fit = clf_5_5.P_.copy() lams_fit = clf_5_5.lams_.copy() clf_5_5.fit(X, y) # (c) running 5 iterations when starting from previous point. clf_5 = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, max_iter=5, warm_start=True, random_state=0) clf_5.P_ = P_fit clf_5.lams_ = lams_fit with warnings.catch_warnings(): warnings.simplefilter("ignore") clf_5.fit(X, y) assert_array_almost_equal(clf_10.P_, clf_5_5.P_) assert_array_almost_equal(clf_10.P_, clf_5.P_) # Prediction results should also be the same if: # (note: could not get this test to work for the exact P_.) noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2) noisy_y += rng.randn(noisy_y.shape[0]) X_train, X_test = X[:10], X[10:] y_train, y_test = noisy_y[:10], noisy_y[10:] beta_low = 0.5 beta = 0.1 beta_hi = 1 ref = FactorizationMachineRegressor(degree=degree, n_components=5, fit_linear=False, fit_lower=None, beta=beta, max_iter=20000, random_state=0) ref.fit(X_train, y_train) y_pred_ref = ref.predict(X_test) # (a) starting from lower beta, increasing and refitting from_low = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=beta_low, warm_start=True, random_state=0) from_low.fit(X_train, y_train) from_low.set_params(beta=beta) from_low.fit(X_train, y_train) y_pred_low = from_low.predict(X_test) # (b) starting from higher beta, decreasing and refitting from_hi = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=beta_hi, warm_start=True, random_state=0) from_hi.fit(X_train, y_train) from_hi.set_params(beta=beta) from_hi.fit(X_train, y_train) y_pred_hi = from_hi.predict(X_test) assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=4) assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=4)
def test_convergence_warning(): y = _poly_predict(X, P, lams, kernel="anova", degree=3) est = FactorizationMachineRegressor(degree=3, beta=1e-8, max_iter=1, random_state=0) assert_warns_message(UserWarning, "converge", est.fit, X, y)
lr.fit(x_all[:len(date), :], y) y_all = lr.predict(x_all) y_all_predict = y_all[len(date):, :] linear_weight = [5, 5] y_linear = np.zeros(len(predict_date), ) for i in xrange(len(predict_date)): y_linear[i] = float(predict_linear[i] * linear_weight[0] + y_all_predict[i] * linear_weight[1]) / float( sum(linear_weight)) if FM_model_switch == 1: FM_model = FactorizationMachineRegressor( n_components=4, alpha=0, beta=0, init_lambdas='random_signs', max_iter=1000, verbose=False) FM_model.fit(x_all[:len(date), :], y) predict_FM = FM_model.predict(x_all[len(date):, :]) else: predict_FM = np.zeros(len(predict_date), ) if linear_bodong == 1: if linear_bodong_guize_FM == 0: calc_bodong_data = y_linear elif linear_bodong_guize_FM == 1: calc_bodong_data = predict_FM y_all_bodong = calc_bodong_data - np.mean(calc_bodong_data) y_4 += y_all_bodong[:, np.newaxis]
print("y_train {0}".format(y_train.shape)) print("X_test {0}".format(X_test.shape)) print("X_test.format = {0}".format(X_test.format)) print("X_test.dtype = {0}".format(X_test.dtype)) print("y_test {0}".format(y_test.shape)) print() print("Training regressors") print("===================") f1, accuracy, train_time, test_time = {}, {}, {}, {} print("Training our solver... ", end="") fm = FactorizationMachineRegressor(n_components=20, fit_linear=True, fit_lower=False, alpha=5, beta=5, degree=2, random_state=0, max_iter=100) t0 = time() fm.fit(X_train, y_train) train_time['polylearn'] = time() - t0 t0 = time() y_pred = fm.predict(X_test) > 0 test_time['polylearn'] = time() - t0 accuracy['polylearn'] = accuracy_score(y_test, y_pred) f1['polylearn'] = f1_score(y_test, y_pred) print("done") try: from fastFM import als