Ejemplo n.º 1
0
 def fit_transform(self, X):
     """"""
     S = log_surplus_confidence_matrix(X, self.alpha, self.epsilon)
     transformed, self.components_ = factorize(S, self.k, self.lambda_reg,
                                               self.n_ter, self.init_std,
                                               self.verbose)
     return transformed
Ejemplo n.º 2
0
 def train(self,
           data,
           num_factors=25,
           lambda_reg=1e-3,
           num_iterations=2,
           init_std=0.01,
           verbose=True):
     self.data = data
     self.W, self.H = wmf.factorize(
         data,
         num_factors=num_factors,
         lambda_reg=lambda_reg,
         num_iterations=num_iterations,
         init_std=init_std,
         verbose=verbose,
         dtype=np.float64,
         recompute_factors=wmf.recompute_factors_bias)
     self.trained = True
Ejemplo n.º 3
0
import batched_inv_precompute
import solve_mp
import solve_gpu

np.random.seed(123)

B = np.load("test_matrix_large.pkl")

# shuffle columns of B so the dense parts are evenly distributed
indices = np.arange(B.shape[1])
np.random.shuffle(indices)
B = B[:, indices]

S = wmf.log_surplus_confidence_matrix(B, alpha=2.0, epsilon=1e-6)


num_factors = 40 + 1
num_iterations = 1
batch_size = 10000

# solve = batched_inv.solve_sequential
# solve = solve_mp.solve_mp
solve = solve_gpu.solve_gpu


# U, V = wmf.factorize(S, num_factors=num_factors, lambda_reg=1e-5, num_iterations=num_iterations, init_std=0.01, verbose=True, dtype='float32',
#     recompute_factors=batched_inv.recompute_factors_bias_batched, batch_size=batch_size, solve=solve)


U, V = wmf.factorize(S, num_factors=num_factors, lambda_reg=1e-5, num_iterations=num_iterations, init_std=0.01, verbose=True, dtype='float32',
    recompute_factors=batched_inv_precompute.recompute_factors_bias_batched_precompute, batch_size=batch_size, solve=solve)
Ejemplo n.º 4
0
import numpy as np
import wmf
import batched_inv
import batched_inv_precompute
import solve_mp
import solve_gpu

np.random.seed(123)

B = np.load("test_matrix.pkl")

S = wmf.log_surplus_confidence_matrix(B, alpha=2.0, epsilon=1e-6)


num_factors = 40 + 1
num_iterations = 1
batch_size = 10000

# solve = batched_inv.solve_sequential
# solve = solve_mp.solve_mp
solve = solve_gpu.solve_gpu


# U, V = wmf.factorize(S, num_factors=num_factors, lambda_reg=1e-5, num_iterations=num_iterations, init_std=0.01, verbose=True, dtype='float32',
#     recompute_factors=batched_inv.recompute_factors_bias_batched, batch_size=batch_size, solve=solve)


U, V = wmf.factorize(S, num_factors=num_factors, lambda_reg=1e-5, num_iterations=num_iterations, init_std=0.01, verbose=True, dtype='float32',
    recompute_factors=batched_inv_precompute.recompute_factors_bias_batched_precompute, batch_size=batch_size, solve=solve)
Ejemplo n.º 5
0
import numpy as np
import wmf


B = np.load("test_matrix.pkl")

S = wmf.log_surplus_confidence_matrix(B, alpha=2.0, epsilon=1e-6)

U, V = wmf.factorize(S, num_factors=41, lambda_reg=1e-5, num_iterations=2, init_std=0.01, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias)
Ejemplo n.º 6
0
import numpy as np  
import pandas as pd  
import scipy.sparse as sparse
import json
import wmf




if __name__ == "__main__":
	song_user_sparse_matrix = sparse.load_npz('..\\Metadata\\song_user_matrix.npz')
	#dense_matrix = song_user_sparse_matrix.todense()
	confidence_matrix = wmf.log_surplus_confidence_matrix(song_user_sparse_matrix,alpha=40,epsilon=10**-8)
	song_latent_factors,user_latent_factors = wmf.factorize(confidence_matrix,num_factors=100)
	np.savez_compressed('..\\Metadata\\song_latent_factors.npz',song_latent_factors)
	np.savez_compressed('..\\Metadata\\user_latent_factors.npz',user_latent_factors)
	print(user_latent_factors.shape,song_latent_factors.shape)
	#print(user_latent_factors)






Ejemplo n.º 7
0
import numpy as np
import wmf

B = np.load("test_matrix.pkl")

S = wmf.log_surplus_confidence_matrix(B, alpha=2.0, epsilon=1e-6)

U, V = wmf.factorize(S,
                     num_factors=41,
                     lambda_reg=1e-5,
                     num_iterations=2,
                     init_std=0.01,
                     verbose=True,
                     dtype='float32',
                     recompute_factors=wmf.recompute_factors_bias)
Ejemplo n.º 8
0
def main(data, p, q, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis = 0)
    graph, _ = zero_index_sparse_graph(graph, axis = 1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    # Data Split
    tr_graph, lu_graph, ts_graph, tr_U, lu_I = pq_samp_split(graph, p, q)
    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    # THIS IS CONFUSING. FIX IT!
    tr_U_zero_indexer = zero_index(tr_U, True)
    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]
    n_tr_U = tr_U.shape[0]
    tr_I = I
    n_tr_I = nI
    lu_U = ts_U = np.setdiff1d(U, tr_U, assume_unique=True)
    n_lu_U = n_ts_U = lu_U.shape[0]
    lu_U_zero_indexer = ts_U_zero_indexer = zero_index(lu_U, True)
    n_lu_I = lu_I.shape[0]
    lu_I_zero_indexer = zero_index(lu_I, True)
    ts_I = np.setdiff1d(I, lu_I, assume_unique=True)
    n_ts_I = ts_I.shape[0]
    ts_I_zero_indexer = zero_index(ts_I, True)

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    zi_tr_graph = zero_index_sparse_graph(tr_graph, axis=0, convert=tr_U_zero_indexer)
    zi_tr_graph_sparse = csr_matrix((zi_tr_graph[1], (zi_tr_graph[0][:, 0], zi_tr_graph[0][:, 1])), shape=(n_tr_U, n_tr_I))


    S_tr_sparse = wmf.log_surplus_confidence_matrix(zi_tr_graph_sparse, alpha=alpha, epsilon=TINY)
    tr_U_f, tr_I_f = wmf.factorize(S_tr_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias)
    tr_I_f = tr_I_f.T
    # Train lookup model with item features fixed
    # 2. Train on graph_lookup: fix item_feat
    zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer)
    zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer)
    zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I))

    S_lu_sparse = wmf.log_surplus_confidence_matrix(zi_lu_graph_sparse, alpha=alpha, epsilon=TINY)
    lu_U_f, _ = wmf.factorize(zi_lu_graph_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias, V=tr_I_f[:, lu_I].T)

    ts_U_f = lu_U_f 
    ts_I_f = tr_I_f[:, ts_I]
    predictions = np.matmul(ts_U_f, ts_I_f)

    zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer)
    zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer)
#
    topk = torch.topk(torch.tensor(predictions), n_ts_I)

    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0])
    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(M, np.mean(precision_score)))
    pass
Ejemplo n.º 9
0
nI = I.shape[0]

# Split data
train, test = edge_samp_split(graph, 0.8)
train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])), shape=(nU, nI))



## Train NMF
K = 10
print("Using {} loss".format(loss))
# model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
S = wmf.log_surplus_confidence_matrix(train_sparse, alpha=2.0, epsilon=1e-6)

user_features, item_features = wmf.factorize(S, num_factors=K, lambda_reg=1e-5, num_iterations=20, 
                                            init_std=0.01, verbose=True, dtype='float32', 
                                            recompute_factors=wmf.recompute_factors_bias)

ts_U = np.unique(test[0][:, 0])
zi_test, test_convert = zero_index_sparse_graph(test)
mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)]
mask_edges[:, 0] = test_convert[mask_edges[:, 0]]

test_user_features = user_features[ts_U]
predictions = np.matmul(test_user_features, item_features.T)

# Scatter update might be faster but for correctness
for edge in mask_edges:
    predictions[tuple(edge)] = 0.

# Recommend top_k
Ejemplo n.º 10
0
def main(data, p, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis=0)
    graph, _ = zero_index_sparse_graph(graph, axis=1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    train, test = edge_samp_split(graph, p)
    train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])),
                              shape=(nU, nI))

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    S = wmf.log_surplus_confidence_matrix(train_sparse,
                                          alpha=alpha,
                                          epsilon=TINY)

    user_features, item_features = wmf.factorize(
        S,
        num_factors=K,
        lambda_reg=lambda_reg,
        num_iterations=num_iterations,
        init_std=init_std,
        verbose=True,
        dtype='float32',
        recompute_factors=wmf.recompute_factors_bias)

    ts_U = np.unique(test[0][:, 0])
    zi_test, test_convert = zero_index_sparse_graph(test)
    mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)]
    mask_edges[:, 0] = test_convert[mask_edges[:, 0]]

    test_user_features = user_features[ts_U]
    predictions = np.matmul(test_user_features, item_features.T)

    # Scatter update might be faster but for correctness
    for edge in mask_edges:
        predictions[tuple(edge)] = 0.

    # Recommend top_k

    topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy()

    # Evaluate: More metrics?
    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0])

    # m=20

    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0],
                                     M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(m, np.mean(precision_score)))
    pass
Ejemplo n.º 11
0
        np.random.shuffle(indx)
        temp = np.asarray(user_dict[i])
        R_test[i, temp[indx[:l / 2], 0]] = temp[indx[:l / 2], 1]
        R_train[i, temp[indx[l / 2:], 0]] = temp[indx[l / 2:], 1]
        num_train += len(indx[l / 2:])
        num_test += len(indx[:l / 2])

    return R_train.tocsr()


#path = 'ratings.csv'
#R = get_data(path)

R = load_data()
R = R[:-1000000, :-100000]
R.data = np.ones_like(R.data)
S = wmf.log_surplus_confidence_matrix(R, alpha=20.0, epsilon=1e-6)

num_iters = 10
num_factors = 50

U, V = wmf.factorize(S,
                     num_factors,
                     R=R,
                     num_iterations=num_iters,
                     verbose=True)

print('rmse', rmse(R, U, V))
np.save('U_wmf_2', U)
np.save('V_wmf_2', V)