예제 #1
0
def test_dict_completion_normalise(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(50, 3)
    V = rng.rand(3, 20)
    X = np.dot(U, V)

    mf = DictCompleter(n_components=3, max_n_iter=100, alpha=1e-3,
                       random_state=0,
                       backend=backend,
                       verbose=0, detrend=True)

    mf.fit(X)

    Y = np.dot(mf.P_, mf.Q_)
    Y += mf.col_mean_[np.newaxis, :]
    Y += mf.row_mean_[:, np.newaxis]
    Y2 = mf.predict(X).toarray()

    assert_array_almost_equal(Y, Y2)

    rmse = np.sqrt(np.mean((X - Y) ** 2))
    rmse2 = mf.score(X)

    assert_almost_equal(rmse, rmse2)
예제 #2
0
def test_dict_completion_missing(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(100, 4)
    V = rng.rand(4, 20)
    X = np.dot(U, V)
    X = sp.csr_matrix(X)
    X_tr, X_te = train_test_split(X, train_size=0.95)
    X_tr = sp.csr_matrix(X_tr)
    X_te = sp.csr_matrix(X_te)

    mf = DictCompleter(
        n_components=4,
        max_n_iter=400,
        alpha=1,
        random_state=0,
        backend=backend,
        detrend=True,
        verbose=0,
    )

    mf.fit(X_tr)
    X_pred = mf.predict(X_te)
    rmse = sqrt(np.sum((X_te.data - X_pred.data)**2) / X_te.data.shape[0])
    X_te_c, _, _ = csr_center_data(X_te)
    rmse_c = sqrt(np.sum((X_te.data - X_te_c.data)**2) / X_te.data.shape[0])
    assert (rmse < rmse_c)
예제 #3
0
def test_dict_completion_missing(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(100, 4)
    V = rng.rand(4, 20)
    X = np.dot(U, V)
    X = sp.csr_matrix(X)
    X_tr, X_te = train_test_split(X, train_size=0.95)
    X_tr = sp.csr_matrix(X_tr)
    X_te = sp.csr_matrix(X_te)

    mf = DictCompleter(
        n_components=4,
        max_n_iter=400,
        alpha=1,
        random_state=0,
        backend=backend,
        detrend=True,
        verbose=0,
    )

    mf.fit(X_tr)
    X_pred = mf.predict(X_te)
    rmse = sqrt(np.sum((X_te.data - X_pred.data)**2) / X_te.data.shape[0])
    row_mean_, col_mean_ = compute_biases(X, beta=mf.beta, inplace=False)
    X_te_c = copy.deepcopy(X_te)
    for i in range(X_te_c.shape[0]):
        X_te_c.data[X_te_c.indptr[i]:X_te_c.indptr[i + 1]] -= row_mean_[i]
        X_te_c.data -= col_mean_.take(X_te_c.indices, mode='clip')
    rmse_c = sqrt(np.sum((X_te.data - X_te_c.data)**2) / X_te.data.shape[0])
    assert (rmse < rmse_c)
예제 #4
0
def test_dict_completion_missing(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(100, 4)
    V = rng.rand(4, 20)
    X = np.dot(U, V)
    X = sp.csr_matrix(X)
    X_tr, X_te = train_test_split(X, train_size=0.95)
    X_tr = sp.csr_matrix(X_tr)
    X_te = sp.csr_matrix(X_te)

    mf = DictCompleter(n_components=4, max_n_iter=400, alpha=1,
                       random_state=0,
                       backend=backend,
                       detrend=True,
                       verbose=0, )

    mf.fit(X_tr)
    X_pred = mf.predict(X_te)
    rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0])
    row_mean_, col_mean_ = compute_biases(X, beta=mf.beta, inplace=False)
    X_te_c = copy.deepcopy(X_te)
    for i in range(X_te_c.shape[0]):
        X_te_c.data[X_te_c.indptr[i]:X_te_c.indptr[i + 1]] -= row_mean_[i]
        X_te_c.data -= col_mean_.take(X_te_c.indices, mode='clip')
    rmse_c = sqrt(np.sum((X_te.data - X_te_c.data) ** 2) / X_te.data.shape[0])
    assert (rmse < rmse_c)
예제 #5
0
def test_dict_completion_normalise(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(50, 3)
    V = rng.rand(3, 20)
    X = np.dot(U, V)

    mf = DictCompleter(n_components=3,
                       max_n_iter=100,
                       alpha=1e-3,
                       random_state=0,
                       backend=backend,
                       verbose=0,
                       detrend=True)

    mf.fit(X)

    Y = np.dot(mf.P_, mf.Q_)
    Y += mf.col_mean_[np.newaxis, :]
    Y += mf.row_mean_[:, np.newaxis]
    Y2 = mf.predict(X).toarray()

    assert_array_almost_equal(Y, Y2)

    rmse = np.sqrt(np.mean((X - Y)**2))
    rmse2 = mf.score(X)

    assert_almost_equal(rmse, rmse2)
예제 #6
0
def test_dict_completion(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(50, 3)
    V = rng.rand(3, 20)
    X = np.dot(U, V)

    mf = DictCompleter(
        n_components=3,
        max_n_iter=100,
        alpha=1e-3,
        random_state=0,
        detrend=False,
        backend=backend,
        verbose=0,
    )

    mf.fit(X)

    Y = np.dot(mf.code_, mf.components_)
    Y2 = mf.predict(X).toarray()

    assert_array_almost_equal(Y, Y2)

    rmse = np.sqrt(np.mean((X - Y)**2))
    rmse2 = mf.score(X)

    assert_almost_equal(rmse, rmse2)
예제 #7
0
def test_dict_completion_missing(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(100, 4)
    V = rng.rand(4, 20)
    X = np.dot(U, V)
    X = sp.csr_matrix(X)
    X_tr, X_te = train_test_split(X, train_size=0.95)
    X_tr = sp.csr_matrix(X_tr)
    X_te = sp.csr_matrix(X_te)

    mf = DictCompleter(n_components=4, max_n_iter=400, alpha=1,
                       random_state=0,
                       backend=backend,
                       detrend=True,
                       verbose=0, )

    mf.fit(X_tr)
    X_pred = mf.predict(X_te)
    rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0])
    X_te_c, _, _ = csr_center_data(X_te)
    rmse_c = sqrt(np.sum((X_te.data - X_te_c.data) ** 2) / X_te.data.shape[0])
    assert(rmse < rmse_c)
예제 #8
0
def test_dict_completion(backend):
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(50, 3)
    V = rng.rand(3, 20)
    X = np.dot(U, V)

    mf = DictCompleter(n_components=3, max_n_iter=100, alpha=1e-3,
                       random_state=0,
                       detrend=False,
                       backend=backend,
                       verbose=0, )

    mf.fit(X)

    Y = np.dot(mf.code_, mf.components_)
    Y2 = mf.predict(X).toarray()

    assert_array_almost_equal(Y, Y2)

    rmse = np.sqrt(np.mean((X - Y) ** 2))
    rmse2 = mf.score(X)

    assert_almost_equal(rmse, rmse2)
예제 #9
0
        X_pred = mf.predict(self.X_te)
        rmse = np.sqrt(np.mean((X_pred.data - self.X_te.data)**2))
        self.rmse.append(rmse)
        print('Test RMSE: ', rmse)
        self.test_time += time.clock() - test_time
        self.times.append(time.clock() - self.start_time - self.test_time)


random_state = 0

mf = DictCompleter(n_components=30,
                   alpha=.001,
                   beta=0,
                   verbose=3,
                   batch_size=1000,
                   detrend=True,
                   offset=0,
                   projection='partial',
                   random_state=0,
                   learning_rate=.9,
                   n_epochs=5,
                   backend='python')

# Need to download from spira
X = load_movielens('10m')
X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state)

X_tr = X_tr.tocsr()
X_te = X_te.tocsr()
cb = Callback(X_tr, X_te)
mf.set_params(callback=cb)
t0 = time.time()
예제 #10
0
        rmse = np.sqrt(np.mean((X_pred.data - self.X_te.data) ** 2))
        self.rmse.append(rmse)
        print("Test RMSE: ", rmse)
        self.test_time += time.clock() - test_time
        self.times.append(time.clock() - self.start_time - self.test_time)


random_state = 0

mf = DictCompleter(
    n_components=30,
    alpha=0.001,
    beta=0,
    verbose=3,
    batch_size=1000,
    detrend=True,
    offset=0,
    projection="partial",
    random_state=0,
    learning_rate=0.9,
    n_epochs=5,
    backend="python",
)

# Need to download from spira
X = load_movielens("10m")
X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state)

X_tr = X_tr.tocsr()
X_te = X_te.tocsr()
cb = Callback(X_tr, X_te)
mf.set_params(callback=cb)
예제 #11
0
        rmse_tr = np.sqrt(np.mean((X_pred.data - self.X_tr.data) ** 2))

        self.rmse.append(rmse)
        self.rmse_tr.append(rmse_tr)
        self.q.append(mf.Q_[1, :10].copy())
        self.test_time += time.clock() - test_time
        self.times.append(time.clock() - self.start_time - self.test_time)


random_state = 0

mf = DictCompleter(n_components=30, alpha=.8, verbose=5,
                   batch_size=60, detrend=True,
                   offset=0,
                   impute=False,
                   fit_intercept=True,
                   random_state=0,
                   learning_rate=0.8,
                   max_n_iter=60000,
                   backend='c')

# Need to download from spira
X = load_movielens('1m')
X_tr, X_te = train_test_split(X, train_size=0.75,
                              random_state=random_state)
X_tr = X_tr.tocsr()
X_te = X_te.tocsr()
cb = Callback(X_tr, X_te)
mf.set_params(callback=cb)
mf.fit(X_tr)
예제 #12
0
        self.rmse.append(rmse)
        self.rmse_tr.append(rmse_tr)
        self.q.append(mf.Q_[1, :10].copy())
        self.test_time += time.clock() - test_time
        self.times.append(time.clock() - self.start_time - self.test_time)


random_state = 0

mf = DictCompleter(n_components=30,
                   alpha=.8,
                   verbose=5,
                   batch_size=60,
                   detrend=True,
                   offset=0,
                   impute=False,
                   fit_intercept=True,
                   random_state=0,
                   learning_rate=0.8,
                   max_n_iter=60000,
                   backend='c')

# Need to download from spira
X = load_movielens('1m')
X_tr, X_te = train_test_split(X, train_size=0.75, random_state=random_state)
X_tr = X_tr.tocsr()
X_te = X_te.tocsr()
cb = Callback(X_tr, X_te)
mf.set_params(callback=cb)
mf.fit(X_tr)
예제 #13
0
trace_dir = expanduser('~/output/modl/recsys_bias')

estimator_grid = {
    'cd': {
        'estimator': ExplicitMF(
            n_components=30,
            detrend=True,
        ),
        'name': 'Coordinate descent'
    },
    'dl': {
        'estimator':
        DictCompleter(n_components=30,
                      detrend=True,
                      projection='full',
                      fit_intercept=True,
                      backend='c'),
        'name':
        'Proposed online masked MF'
    },
    'dl_partial': {
        'estimator':
        DictCompleter(n_components=30,
                      detrend=True,
                      projection='partial',
                      fit_intercept=True,
                      backend='c'),
        'name':
        'Proposed algorithm'
        ' (with partial projection)'