def test_dict_completion_normalise(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = DictCompleter(n_components=3, max_n_iter=100, alpha=1e-3, random_state=0, backend=backend, verbose=0, detrend=True) mf.fit(X) Y = np.dot(mf.P_, mf.Q_) Y += mf.col_mean_[np.newaxis, :] Y += mf.row_mean_[:, np.newaxis] Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y) ** 2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
def test_dict_completion_missing(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(100, 4) V = rng.rand(4, 20) X = np.dot(U, V) X = sp.csr_matrix(X) X_tr, X_te = train_test_split(X, train_size=0.95) X_tr = sp.csr_matrix(X_tr) X_te = sp.csr_matrix(X_te) mf = DictCompleter( n_components=4, max_n_iter=400, alpha=1, random_state=0, backend=backend, detrend=True, verbose=0, ) mf.fit(X_tr) X_pred = mf.predict(X_te) rmse = sqrt(np.sum((X_te.data - X_pred.data)**2) / X_te.data.shape[0]) X_te_c, _, _ = csr_center_data(X_te) rmse_c = sqrt(np.sum((X_te.data - X_te_c.data)**2) / X_te.data.shape[0]) assert (rmse < rmse_c)
def test_dict_completion_missing(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(100, 4) V = rng.rand(4, 20) X = np.dot(U, V) X = sp.csr_matrix(X) X_tr, X_te = train_test_split(X, train_size=0.95) X_tr = sp.csr_matrix(X_tr) X_te = sp.csr_matrix(X_te) mf = DictCompleter( n_components=4, max_n_iter=400, alpha=1, random_state=0, backend=backend, detrend=True, verbose=0, ) mf.fit(X_tr) X_pred = mf.predict(X_te) rmse = sqrt(np.sum((X_te.data - X_pred.data)**2) / X_te.data.shape[0]) row_mean_, col_mean_ = compute_biases(X, beta=mf.beta, inplace=False) X_te_c = copy.deepcopy(X_te) for i in range(X_te_c.shape[0]): X_te_c.data[X_te_c.indptr[i]:X_te_c.indptr[i + 1]] -= row_mean_[i] X_te_c.data -= col_mean_.take(X_te_c.indices, mode='clip') rmse_c = sqrt(np.sum((X_te.data - X_te_c.data)**2) / X_te.data.shape[0]) assert (rmse < rmse_c)
def test_dict_completion_missing(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(100, 4) V = rng.rand(4, 20) X = np.dot(U, V) X = sp.csr_matrix(X) X_tr, X_te = train_test_split(X, train_size=0.95) X_tr = sp.csr_matrix(X_tr) X_te = sp.csr_matrix(X_te) mf = DictCompleter(n_components=4, max_n_iter=400, alpha=1, random_state=0, backend=backend, detrend=True, verbose=0, ) mf.fit(X_tr) X_pred = mf.predict(X_te) rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0]) row_mean_, col_mean_ = compute_biases(X, beta=mf.beta, inplace=False) X_te_c = copy.deepcopy(X_te) for i in range(X_te_c.shape[0]): X_te_c.data[X_te_c.indptr[i]:X_te_c.indptr[i + 1]] -= row_mean_[i] X_te_c.data -= col_mean_.take(X_te_c.indices, mode='clip') rmse_c = sqrt(np.sum((X_te.data - X_te_c.data) ** 2) / X_te.data.shape[0]) assert (rmse < rmse_c)
def test_dict_completion_normalise(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = DictCompleter(n_components=3, max_n_iter=100, alpha=1e-3, random_state=0, backend=backend, verbose=0, detrend=True) mf.fit(X) Y = np.dot(mf.P_, mf.Q_) Y += mf.col_mean_[np.newaxis, :] Y += mf.row_mean_[:, np.newaxis] Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y)**2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
def test_dict_completion(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = DictCompleter( n_components=3, max_n_iter=100, alpha=1e-3, random_state=0, detrend=False, backend=backend, verbose=0, ) mf.fit(X) Y = np.dot(mf.code_, mf.components_) Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y)**2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
def test_dict_completion_missing(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(100, 4) V = rng.rand(4, 20) X = np.dot(U, V) X = sp.csr_matrix(X) X_tr, X_te = train_test_split(X, train_size=0.95) X_tr = sp.csr_matrix(X_tr) X_te = sp.csr_matrix(X_te) mf = DictCompleter(n_components=4, max_n_iter=400, alpha=1, random_state=0, backend=backend, detrend=True, verbose=0, ) mf.fit(X_tr) X_pred = mf.predict(X_te) rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0]) X_te_c, _, _ = csr_center_data(X_te) rmse_c = sqrt(np.sum((X_te.data - X_te_c.data) ** 2) / X_te.data.shape[0]) assert(rmse < rmse_c)
def test_dict_completion(backend): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(50, 3) V = rng.rand(3, 20) X = np.dot(U, V) mf = DictCompleter(n_components=3, max_n_iter=100, alpha=1e-3, random_state=0, detrend=False, backend=backend, verbose=0, ) mf.fit(X) Y = np.dot(mf.code_, mf.components_) Y2 = mf.predict(X).toarray() assert_array_almost_equal(Y, Y2) rmse = np.sqrt(np.mean((X - Y) ** 2)) rmse2 = mf.score(X) assert_almost_equal(rmse, rmse2)
X_pred = mf.predict(self.X_te) rmse = np.sqrt(np.mean((X_pred.data - self.X_te.data)**2)) self.rmse.append(rmse) print('Test RMSE: ', rmse) self.test_time += time.clock() - test_time self.times.append(time.clock() - self.start_time - self.test_time) random_state = 0 mf = DictCompleter(n_components=30, alpha=.001, beta=0, verbose=3, batch_size=1000, detrend=True, offset=0, projection='partial', random_state=0, learning_rate=.9, n_epochs=5, backend='python') # Need to download from spira X = load_movielens('10m') X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state) X_tr = X_tr.tocsr() X_te = X_te.tocsr() cb = Callback(X_tr, X_te) mf.set_params(callback=cb) t0 = time.time()
rmse = np.sqrt(np.mean((X_pred.data - self.X_te.data) ** 2)) self.rmse.append(rmse) print("Test RMSE: ", rmse) self.test_time += time.clock() - test_time self.times.append(time.clock() - self.start_time - self.test_time) random_state = 0 mf = DictCompleter( n_components=30, alpha=0.001, beta=0, verbose=3, batch_size=1000, detrend=True, offset=0, projection="partial", random_state=0, learning_rate=0.9, n_epochs=5, backend="python", ) # Need to download from spira X = load_movielens("10m") X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state) X_tr = X_tr.tocsr() X_te = X_te.tocsr() cb = Callback(X_tr, X_te) mf.set_params(callback=cb)
rmse_tr = np.sqrt(np.mean((X_pred.data - self.X_tr.data) ** 2)) self.rmse.append(rmse) self.rmse_tr.append(rmse_tr) self.q.append(mf.Q_[1, :10].copy()) self.test_time += time.clock() - test_time self.times.append(time.clock() - self.start_time - self.test_time) random_state = 0 mf = DictCompleter(n_components=30, alpha=.8, verbose=5, batch_size=60, detrend=True, offset=0, impute=False, fit_intercept=True, random_state=0, learning_rate=0.8, max_n_iter=60000, backend='c') # Need to download from spira X = load_movielens('1m') X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state) X_tr = X_tr.tocsr() X_te = X_te.tocsr() cb = Callback(X_tr, X_te) mf.set_params(callback=cb) mf.fit(X_tr)
self.rmse.append(rmse) self.rmse_tr.append(rmse_tr) self.q.append(mf.Q_[1, :10].copy()) self.test_time += time.clock() - test_time self.times.append(time.clock() - self.start_time - self.test_time) random_state = 0 mf = DictCompleter(n_components=30, alpha=.8, verbose=5, batch_size=60, detrend=True, offset=0, impute=False, fit_intercept=True, random_state=0, learning_rate=0.8, max_n_iter=60000, backend='c') # Need to download from spira X = load_movielens('1m') X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state) X_tr = X_tr.tocsr() X_te = X_te.tocsr() cb = Callback(X_tr, X_te) mf.set_params(callback=cb) mf.fit(X_tr)
trace_dir = expanduser('~/output/modl/recsys_bias') estimator_grid = { 'cd': { 'estimator': ExplicitMF( n_components=30, detrend=True, ), 'name': 'Coordinate descent' }, 'dl': { 'estimator': DictCompleter(n_components=30, detrend=True, projection='full', fit_intercept=True, backend='c'), 'name': 'Proposed online masked MF' }, 'dl_partial': { 'estimator': DictCompleter(n_components=30, detrend=True, projection='partial', fit_intercept=True, backend='c'), 'name': 'Proposed algorithm' ' (with partial projection)'