from helpers import nDCG, precision_at_m

# Load data
loc = "/Users/ekansh/repos/data/{}"
ds = "nyt/"
print("Dataset is : {}".format(ds))
loss = 'kullback-leibler'
# loss = "frobenius"
#
with open(loc.format(ds) + 'data.pkl') as f:
    data = pickle.load(f)

graph = (data[:, :2], data[:, 2])

graph, _ = zero_index_sparse_graph(graph, axis=0)
graph, _ = zero_index_sparse_graph(graph, axis=1)

# Data Split
tr_graph, lu_graph, ts_graph, tr_U, lu_I = pq_samp_split(graph)
U = np.unique(graph[0][:, 0])
nU = U.shape[0]

# THIS IS CONFUSING. FIX IT!
tr_U_zero_indexer = zero_index(tr_U, True)
I = np.unique(graph[0][:, 1])
nI = I.shape[0]
n_tr_U = tr_U.shape[0]
tr_I = I
n_tr_I = nI
lu_U = ts_U = np.setdiff1d(U, tr_U, assume_unique=True)
Exemple #2
0
import wmf

# Load data
loc = "/Users/ekansh/repos/data/{}"
ds = "nyt/"
loss = 'kullback-leibler'
# loss = "frobenius"

print("Dataset is : {}".format(ds))
with open(loc.format(ds) + 'data.pkl') as f:
    data = pickle.load(f)


graph = (data[:, :2], data[:, 2])

graph, _ = zero_index_sparse_graph(graph, axis = 0)
graph, _ = zero_index_sparse_graph(graph, axis = 1)

U = np.unique(graph[0][:, 0])
nU = U.shape[0]

I = np.unique(graph[0][:, 1])
nI = I.shape[0]

# Split data
train, test = edge_samp_split(graph, 0.8)
train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])), shape=(nU, nI))



## Train NMF
Exemple #3
0
def main(data, p, q, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis = 0)
    graph, _ = zero_index_sparse_graph(graph, axis = 1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    # Data Split
    tr_graph, lu_graph, ts_graph, tr_U, lu_I = pq_samp_split(graph, p, q)
    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    # THIS IS CONFUSING. FIX IT!
    tr_U_zero_indexer = zero_index(tr_U, True)
    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]
    n_tr_U = tr_U.shape[0]
    tr_I = I
    n_tr_I = nI
    lu_U = ts_U = np.setdiff1d(U, tr_U, assume_unique=True)
    n_lu_U = n_ts_U = lu_U.shape[0]
    lu_U_zero_indexer = ts_U_zero_indexer = zero_index(lu_U, True)
    n_lu_I = lu_I.shape[0]
    lu_I_zero_indexer = zero_index(lu_I, True)
    ts_I = np.setdiff1d(I, lu_I, assume_unique=True)
    n_ts_I = ts_I.shape[0]
    ts_I_zero_indexer = zero_index(ts_I, True)

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    zi_tr_graph = zero_index_sparse_graph(tr_graph, axis=0, convert=tr_U_zero_indexer)
    zi_tr_graph_sparse = csr_matrix((zi_tr_graph[1], (zi_tr_graph[0][:, 0], zi_tr_graph[0][:, 1])), shape=(n_tr_U, n_tr_I))


    S_tr_sparse = wmf.log_surplus_confidence_matrix(zi_tr_graph_sparse, alpha=alpha, epsilon=TINY)
    tr_U_f, tr_I_f = wmf.factorize(S_tr_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias)
    tr_I_f = tr_I_f.T
    # Train lookup model with item features fixed
    # 2. Train on graph_lookup: fix item_feat
    zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer)
    zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer)
    zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I))

    S_lu_sparse = wmf.log_surplus_confidence_matrix(zi_lu_graph_sparse, alpha=alpha, epsilon=TINY)
    lu_U_f, _ = wmf.factorize(zi_lu_graph_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias, V=tr_I_f[:, lu_I].T)

    ts_U_f = lu_U_f 
    ts_I_f = tr_I_f[:, ts_I]
    predictions = np.matmul(ts_U_f, ts_I_f)

    zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer)
    zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer)
#
    topk = torch.topk(torch.tensor(predictions), n_ts_I)

    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0])
    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(M, np.mean(precision_score)))
    pass
Exemple #4
0
def main(data, p, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis=0)
    graph, _ = zero_index_sparse_graph(graph, axis=1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    train, test = edge_samp_split(graph, p)
    train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])),
                              shape=(nU, nI))

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    S = wmf.log_surplus_confidence_matrix(train_sparse,
                                          alpha=alpha,
                                          epsilon=TINY)

    user_features, item_features = wmf.factorize(
        S,
        num_factors=K,
        lambda_reg=lambda_reg,
        num_iterations=num_iterations,
        init_std=init_std,
        verbose=True,
        dtype='float32',
        recompute_factors=wmf.recompute_factors_bias)

    ts_U = np.unique(test[0][:, 0])
    zi_test, test_convert = zero_index_sparse_graph(test)
    mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)]
    mask_edges[:, 0] = test_convert[mask_edges[:, 0]]

    test_user_features = user_features[ts_U]
    predictions = np.matmul(test_user_features, item_features.T)

    # Scatter update might be faster but for correctness
    for edge in mask_edges:
        predictions[tuple(edge)] = 0.

    # Recommend top_k

    topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy()

    # Evaluate: More metrics?
    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0])

    # m=20

    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0],
                                     M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(m, np.mean(precision_score)))
    pass