def test_grid_search(): cv = ShuffleSplit(n_iter=5, random_state=0) mf = ExplicitMF(n_components=3, max_iter=10, random_state=0) param_grid = {"alpha": [0.1, 1.0, 10]} gcv = GridSearchCV(mf, param_grid, cv) gcv.fit(X) assert_equal(gcv.best_estimator_.alpha, 0.1) assert_equal(gcv.best_params_, {"alpha": 0.1}) mf = ExplicitMF(alpha=0.1, n_components=3, max_iter=10, random_state=0) mf.fit(X) assert_almost_equal(mf.score(X), gcv.score(X))
def test_grid_search(): cv = ShuffleSplit(n_iter=5, random_state=0) mf = ExplicitMF(n_components=3, max_iter=10, random_state=0) param_grid = {"alpha": [0.1, 1.0, 10]} gcv = GridSearchCV(mf, param_grid, cv) gcv.fit(X) assert_equal(gcv.best_estimator_.alpha, 0.1) assert_equal(gcv.best_params_, {"alpha": 0.1}) mf = ExplicitMF(alpha=0.1, n_components=3, max_iter=10, random_state=0) mf.fit(X) assert_almost_equal(mf.score(X), gcv.score(X))
def test_matrix_fact_cd(): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = ExplicitMF(n_components=3, max_iter=10, alpha=1e-3, random_state=0, verbose=0) mf.fit(X) Y = np.dot(mf.P_, mf.Q_) Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y) ** 2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
def test_matrix_fact_cd(): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = ExplicitMF(n_components=3, max_iter=10, alpha=1e-3, random_state=0, verbose=0) mf.fit(X) Y = np.dot(mf.P_, mf.Q_) Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y)**2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
try: version = sys.argv[1] except: version = "100k" X = load_movielens(version) print X.shape X_tr, X_te = train_test_split(X, train_size=0.75, random_state=0) X_tr = X_tr.tocsr() X_te = X_te.tocsr() cb = Callback(X_tr, X_te) mf = ExplicitMF(n_components=30, max_iter=50, alpha=0.1, verbose=1, callback=cb) mf.fit(X_tr) plt.figure() plt.plot(cb.times, cb.obj) plt.xlabel("CPU time") plt.xscale("log") plt.ylabel("Objective value") plt.figure() plt.plot(cb.times, cb.rmse) plt.xlabel("CPU time") plt.xscale("log") plt.ylabel("RMSE") plt.show()
import sys import time from spira.datasets import load_movielens from spira.cross_validation import train_test_split from spira.completion import ExplicitMF try: version = sys.argv[1] except: version = "100k" X = load_movielens(version) print X.shape X_tr, X_te = train_test_split(X, train_size=0.75, random_state=0) start = time.time() mf = ExplicitMF(n_components=30, max_iter=10, alpha=1e-1, random_state=0, verbose=1) mf.fit(X_tr) print "Time", time.time() - start print "RMSE", mf.score(X_te)