def wrsampleae(matrix_train, matrix_valid, matrix_unif_train, iteration=100, lam=0.01, rank=50, seed=0,
               batch_size=256, gpu_on=True, **unused):
    progress = WorkSplitter()

    progress.section("WRSampleAE: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("WRSampleAE: Training")
    m, n = matrix_train.shape

    marks = sparse.csr_matrix(matrix_train.shape)
    marks[(matrix_train != 0).nonzero()] = 1

    matrix_train += matrix_unif_train

    model = WRSampleAE(n, rank, m, lamb=lam, batch_size=batch_size, gpu_on=gpu_on)
    metric_names = ['NLL', 'AUC']

    RQ, X, xBias, Y, yBias = model.train_model(matrix_train, marks, matrix_valid, iteration, metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, X, xBias, Y, yBias
Example #2
0
def plrec(matrix_train, iteration=4, lamb=80, rank=200, seed=1, **unused):
    """
    Function used to achieve generalized projected lrec w/o item-attribute embedding
    :param matrix_train: user-item matrix with shape m*n
    :param iteration: number of power iterations in randomized svd
    :param lamb: parameter of penalty
    :param rank: latent dimension size
    :param seed: the seed of the pseudo random number generator to use when shuffling the data
    :return: prediction in sparse matrix
    """
    progress = WorkSplitter()

    progress.subsection("Randomized SVD")
    start_time = time.time()
    P, sigma, Qt = randomized_svd(matrix_train,
                                  n_components=rank,
                                  n_iter=iteration,
                                  random_state=seed)

    RQ = matrix_train.dot(sparse.csc_matrix(Qt.T*np.sqrt(sigma)))

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    progress.subsection("Closed-Form Linear Optimization")
    start_time = time.time()
    pre_inv = RQ.T.dot(RQ) + lamb * sparse.identity(rank, dtype=np.float32)
    inverse = sparse.linalg.inv(pre_inv.tocsc())
    Y = inverse.dot(RQ.T).dot(matrix_train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    return np.array(RQ.todense()), np.array(Y.todense()), None
Example #3
0
def pure_svd(matrix_train,
             embeded_matrix=np.empty((0)),
             iteration=4,
             rank=200,
             fb=False,
             seed=1,
             **unused):
    """
    PureSVD algorithm

    :param matrix_train: rating matrix
    :param embeded_matrix: item or user embedding matrix(side info)
    :param iteration: number of random SVD iterations
    :param rank: SVD top K eigenvalue ranks
    :param fb: facebook package or sklearn package. boolean
    :param seed: Random initialization seed
    :param unused: args that not applicable for this algorithm
    :return:
    """
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    progress.subsection("Randomized SVD")
    start_time = time.time()
    if fb:
        P, sigma, Qt = pca(matrix_input, k=rank, n_iter=iteration, raw=True)
    else:
        P, sigma, Qt = randomized_svd(matrix_input,
                                      n_components=rank,
                                      n_iter=iteration,
                                      power_iteration_normalizer='QR',
                                      random_state=seed)

    RQ = matrix_input.dot(sparse.csc_matrix(Qt).T)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    return np.array(RQ.todense()), Qt, None
Example #4
0
def nceplrec(matrix_train,
             embeded_matrix=np.empty((0)),
             iteration=4,
             lam=80,
             rank=200,
             seed=1,
             root=1.1,
             gpu_on=True,
             **unused):
    """
    Function used to achieve generalized projected lrec w/o item-attribute embedding
    :param matrix_train: user-item matrix with shape m*n
    :param embeded_matrix: item-attribute matrix with length n (each row represents one item)
    :param lam: parameter of penalty
    :param k_factor: ratio of the latent dimension/number of items
    :return: prediction in sparse matrix
    """
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    progress.subsection("Create PMI matrix")
    if gpu_on:
        pmi_matrix = get_pmi_matrix_gpu(matrix_input, root)
    else:
        pmi_matrix = get_pmi_matrix(matrix_input, root)

    progress.subsection("Randomized SVD")
    start_time = time.time()
    P, sigma, Qt = randomized_svd(pmi_matrix,
                                  n_components=rank,
                                  n_iter=iteration,
                                  random_state=seed)
    # Plain
    # RQ = matrix_input.dot(sparse.csc_matrix(Qt).T)

    # sqrt sigma injection
    RQ = matrix_input.dot(sparse.csc_matrix(Qt.T * np.sqrt(sigma)))

    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    progress.subsection("Closed-Form Linear Optimization")
    start_time = time.time()
    pre_inv = RQ.T.dot(RQ) + lam * sparse.identity(rank, dtype=np.float32)
    inverse = inv(pre_inv)
    Y = inverse.dot(RQ.T).dot(matrix_input)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))
    return np.array(RQ.todense()), np.array(Y.todense()), None
Example #5
0
def main(args):
    progress = WorkSplitter()

    progress.section("Load Raw Data")
    rating_matrix, timestamp_matrix = load_netflix(path=args.folder, shape=args.shape)
    progress.section("Split CSR Matrices")
    rtrain, rvalid, rtest, nonzero_index, rtime = time_ordered_split(rating_matrix=rating_matrix,
                                                                     timestamp_matrix=timestamp_matrix,
                                                                     ratio=args.ratio, implicit=args.implicit)
    progress.section("Save NPZ")
    save_numpy(rtrain, args.path, "Rtrain")
    save_numpy(rvalid, args.path, "Rvalid")
    save_numpy(rtest, args.path, "Rtest")
    save_numpy(rtime, args.path, "Rtime")
    save_array(nonzero_index, args.path, "Index")
def main(args):
    # Progress bar
    progress = WorkSplitter()

    progress.section("Load Data")
    if args.emb_type == 'bert':
        emb_size = 768
    elif args.emb_type == 'xlmr':
        emb_size = 1024

    # Load Data
    start_time = time.time()
    print("WARNING: Embedding size is set to", emb_size)
    data = Data(args, args.path, args.train, args.valid,emb_size, is_lb=True)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))
    
    #build model
    progress.section("Build Model")

    if args.network_architecture == 'embedding_net':
        model = EmbeddingNet(data.n_token, data.n_feature, emb_size, [1024, 2000, 1000, 500, 100],corruption=args.corruption)
    elif args.network_architecture == 'embedding_highway_net':
        model = EmbeddingHighWayNet(data.n_token, data.n_feature, emb_size, [1024, 2000, 1000, 500, 100])
    else:
        raise NotImplementedError('either use embedding_net or embedding_highway_net')
    model.cuda()
    print(model)

    model.load_state_dict(torch.load(args.checkpoint))

    print(model)
    lb_loader = data.instance_a_lb_loader(args.batch)

    lbs = {'user_lb': list(), 'tweet_lb': list()}
    preds = []
    model.eval()
    with torch.no_grad():
        lb_iterator = tqdm(lb_loader, desc="lb")
        for _, batch in enumerate(lb_iterator):
            token, feature, tweet_lb, user_lb, embedding = batch[0].float().cuda(), batch[1].float().cuda(), batch[2], batch[3], batch[4].float().cuda()#,batch[4].cuda()
            pred = torch.sigmoid(model(token,feature,embedding)).detach().cpu().numpy()
            
            if "Valid" in args.valid:
                lbs['tweet_lb'] += tweet_lb
            else:
                lbs['tweet_lb'] += tweet_lb[0]
            lbs['user_lb'] += user_lb[0]
            preds.append(pred)

        final_csv = pd.DataFrame(lbs)
        preds = np.float64(np.vstack(preds))
        if not os.path.exists(args.spath):
            os.makedirs(args.spath)

        print("Generating CSVs...")
        for i, engage in enumerate(["reply", "retweet", "comment", "like"]):
            final_csv[engage] = preds[:,i]
            final_csv[['tweet_lb','user_lb',engage]].to_csv(os.path.join(args.spath, engage+'.csv'),index=False, header=False)
Example #7
0
def sensitivity(train, validation, params):
    progress = WorkSplitter()
    progress.section("PMI-PLRec Default")
    RQ, Yt, _ = params['models']['NCE-PLRec'](train,
                                              embeded_matrix=np.empty((0)),
                                              iteration=params['iter'],
                                              rank=params['rank'],
                                              lam=params['lambda'],
                                              root=1.0)
    Y = Yt.T

    default_prediction = predict(matrix_U=RQ,
                                 matrix_V=Y,
                                 topK=params['topK'][-1],
                                 matrix_Train=train,
                                 gpu=True)

    default_result = evaluate(default_prediction, validation, params['metric'],
                              params['topK'])
    print("-")
    print("Rank: {0}".format(params['rank']))
    print("Lambda: {0}".format(params['lambda']))
    print("SVD Iteration: {0}".format(params['iter']))
    print("Evaluation Ranking Topk: {0}".format(params['topK']))
    for key in default_result.keys():
        print("{0} :{1}".format(key, default_result[key]))

    sensitivity_results = dict()
    for root in tqdm(params['root']):
        progress.section("PMI-PLRec, Root: " + str(root))
        RQ, Yt, _ = params['models']['NCE-PLRec'](train,
                                                  embeded_matrix=np.empty((0)),
                                                  iteration=params['iter'],
                                                  rank=params['rank'],
                                                  lam=params['lambda'],
                                                  root=root)
        Y = Yt.T

        prediction = predict(matrix_U=RQ,
                             matrix_V=Y,
                             topK=params['topK'][-1],
                             matrix_Train=train,
                             gpu=True)

        result = evaluate(prediction, validation, params['metric'],
                          params['topK'])

        sensitivity_results[root] = result
        print("-")
        print("Root: {0}".format(root))
        print("Rank: {0}".format(params['rank']))
        print("Lambda: {0}".format(params['lambda']))
        print("SVD Iteration: {0}".format(params['iter']))
        print("Evaluation Ranking Topk: {0}".format(params['topK']))
        for key in result.keys():
            print("{0} :{1}".format(key, result[key]))

    return default_result, sensitivity_results
Example #8
0
def hintae(matrix_train,
           matrix_valid,
           iteration=100,
           lam=0.01,
           rank=50,
           rank2=50,
           seed=0,
           batch_size=256,
           confidence=0.9,
           dataset=None,
           gpu_on=True,
           **unused):
    progress = WorkSplitter()

    progress.section("HintAE: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("HintAE: Load the variables trained on S_t")

    X = np.load('latent/' + dataset + 'unif_X_DeepAutoRec_200.npy')
    xBias = np.load('latent/' + dataset + 'unif_xB_DeepAutoRec_200.npy')
    Y = np.load('latent/' + dataset + 'unif_Y_DeepAutoRec_200.npy')
    yBias = np.load('latent/' + dataset + 'unif_yB_DeepAutoRec_200.npy')

    progress.section("HintAE: Training")
    m, n = matrix_train.shape
    model = HintAE(n,
                   rank,
                   rank2,
                   lamb=lam,
                   batch_size=batch_size,
                   gpu_on=gpu_on,
                   init_X=X,
                   init_Y=Y,
                   init_xBias=xBias,
                   init_yBias=yBias,
                   confidence=confidence)
    metric_names = ['NLL', 'AUC']
    RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = model.train_model(
        matrix_train, matrix_valid, iteration, metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, X, xBias, Y, yBias, Z, zBias, K, kBias
Example #9
0
def initfeatureembedmf(matrix_train,
                       matrix_valid,
                       iteration=100,
                       lam=0.01,
                       rank=50,
                       seed=0,
                       batch_size=500,
                       gpu_on=True,
                       way='both',
                       dataset=None,
                       **unused):
    progress = WorkSplitter()

    progress.section("InitFeatureEmbedMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("InitFeatureEmbedMF: Load the variables trained on S_t")

    RQ = np.load('latent/' + dataset + 'unif_U_BiasedMF_10.npy')
    Y = np.load('latent/' + dataset + 'unif_V_BiasedMF_10.npy')
    uBias = np.load('latent/' + dataset + 'unif_uB_BiasedMF_10.npy')
    iBias = np.load('latent/' + dataset + 'unif_iB_BiasedMF_10.npy')

    progress.section("InitFeatureEmbedMF: Training")
    m, n = matrix_train.shape
    model = InitFeatureEmbedMF(m,
                               n,
                               rank,
                               lamb=lam,
                               batch_size=batch_size,
                               gpu_on=gpu_on,
                               init_U=RQ,
                               init_V=Y,
                               init_uBias=uBias,
                               init_iBias=iBias,
                               way=way)
    metric_names = ['NLL', 'AUC']
    RQ, Y, user_bias, item_bias = model.train_model(matrix_train, matrix_valid,
                                                    iteration, metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
Example #10
0
def pop(matrix_train, **unused):
    """
    Function used to achieve generalized projected lrec w/o item-attribute embedding
    :param matrix_train: user-item matrix with shape m*n
    :param embeded_matrix: item-attribute matrix with length n (each row represents one item)
    :param lam: parameter of penalty
    :param k_factor: ratio of the latent dimension/number of items
    :return: prediction in sparse matrix
    """
    progress = WorkSplitter()
    m, n = matrix_train.shape
    item_popularity = np.array(np.sum(matrix_train, axis=0)).flatten()

    RQ = np.ones((m, 1))
    Y = item_popularity.reshape((1, n))
    return RQ, Y, None
Example #11
0
def bpr(matrix_train, embeded_matrix=np.empty((0)), iteration=100, lam=80, rank=200, seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = BPR(m, n, rank, lamb=lam, batch_size=500)
    model.train_model(matrix_input, iteration)

    RQ = model.get_RQ()
    Y = model.get_Y().T
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, None
Example #12
0
def cml(matrix_train, embeded_matrix=np.empty((0)), iteration=100, lam=80, rank=200, seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = CollaborativeMetricLearning(num_users=m, num_items=n, embed_dim=rank, cov_loss_weight=lam)

    model.train_model(matrix_input, iteration)

    RQ = model.get_RQ()
    Y = model.get_Y().T
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, None
Example #13
0
def main(args):
    progress = WorkSplitter()
    progress.section("Load Raw Data")
    #rating_matrix = load_pandas(path=args.path, name=args.name, shape=args.shape)
    rating_matrix = load_yahoo(path=args.path,
                               name=args.name,
                               shape=args.shape)
    #timestamp_matrix = load_pandas(path=args.path, value_name='timestamp', name=args.name, shape=args.shape)
    progress.section("Split CSR Matrices")
    #rtrain, rvalid, rtest, nonzero_index = time_ordered_split(rating_matrix=rating_matrix,                                                          ratio=args.ratio, implicit=args.implicit)
    rtrain, rvalid, rtest, nonzero_index = split_seed_randomly(
        rating_matrix=rating_matrix, ratio=args.ratio, implicit=args.implicit)
    print("Done splitting Yahoo dataset")
    progress.section("Save NPZ")
    save_numpy(rtrain, args.path, "Rtrain")
    save_numpy(rvalid, args.path, "Rvalid")
    save_numpy(rtest, args.path, "Rtest")
    save_array(nonzero_index, args.path, "Index")
    print("Done saving data for yahoo after splitting")
Example #14
0
def cdae(matrix_train, embeded_matrix=np.empty((0)), iteration=100, lam=80, rank=200, corruption=0.8, seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = CDAE(m, n, rank, 100, lamb=lam)

    model.train_model(matrix_input, corruption, iteration)

    RQ = model.get_RQ(matrix_input)
    Y = model.get_Y()
    Bias = model.get_Bias()
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, Bias
Example #15
0
def acf(matrix_train,
        embeded_matrix=np.empty((0)),
        epoch=300,
        iteration=100,
        lamb=80,
        rank=100,
        key_dim=3,
        batch_size=32,
        optimizer="Adam",
        learning_rate=0.001,
        seed=1,
        root=1,
        fb=False,
        **unused):

    print(epoch, lamb, rank)
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))
    progress.subsection("Create PMI matrix")
    pmi_matrix = get_pmi_matrix(matrix_input, root)
    progress.subsection("Randomized SVD")
    start_time = time.time()
    if fb:
        P, sigma, Qt = pca(pmi_matrix, k=rank, n_iter=iteration, raw=True)
    else:
        P, sigma, Qt = randomized_svd(pmi_matrix,
                                      n_components=rank,
                                      n_iter=iteration,
                                      power_iteration_normalizer='QR',
                                      random_state=seed)
    Q = Qt.T * np.sqrt(sigma)
    m, n = matrix_input.shape
    model = ACF(m,
                n,
                rank,
                key_dim,
                lamb=lamb,
                batch_size=batch_size,
                learning_rate=learning_rate,
                optimizer=Optimizer[optimizer],
                item_embeddings=Q)
    model.train_model(matrix_input, epoch)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    RQ = model.get_RQ()
    Y = model.get_Y().T
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, None
Example #16
0
def autorec(matrix_train, embeded_matrix=np.empty((0)), iteration=100, lam=80, rank=200,
            optimizer='RMSProp', seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = AutoRec(n, rank, 100, lamb=lam, optimizer=Regularizer[optimizer])

    model.train_model(matrix_input, iteration)

    RQ = model.get_RQ(matrix_input)
    Y = model.get_Y()
    Bias = model.get_Bias()
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, Bias
Example #17
0
def vae_cf(matrix_train, embedded_matrix=np.empty((0)),
           iteration=100, lam=80, rank=200, corruption=0.5, optimizer="RMSProp", seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embedded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embedded_matrix.T))

    m, n = matrix_input.shape
    model = VAE(n, rank, 100, lamb=lam, observation_distribution="Multinomial", optimizer=Regularizer[optimizer])

    model.train_model(matrix_input, corruption, iteration)

    RQ = model.get_RQ(matrix_input)
    Y = model.get_Y()
    Bias = model.get_Bias()
    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, Bias
def refinelabelmf(matrix_train, matrix_valid, iteration=100, lam=0.01, confidence=0.9, rank=50,
                  seed=0, batch_size=500, gpu_on=True, dataset=None, **unused):
    progress = WorkSplitter()

    progress.section("RefineLabelMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("RefineLabelMF: Load the variables trained on S_c/S_t")

    norm_RQ = np.load('latent/' + dataset + 'U_BiasedMF_10.npy')
    norm_Y = np.load('latent/' + dataset + 'V_BiasedMF_10.npy')
    norm_uBias = np.load('latent/' + dataset + 'uB_BiasedMF_10.npy')
    norm_iBias = np.load('latent/' + dataset + 'iB_BiasedMF_10.npy')

    unif_RQ = np.load('latent/' + dataset + 'unif_U_BiasedMF_10.npy')
    unif_Y = np.load('latent/' + dataset + 'unif_V_BiasedMF_10.npy')
    unif_uBias = np.load('latent/' + dataset + 'unif_uB_BiasedMF_10.npy')
    unif_iBias = np.load('latent/' + dataset + 'unif_iB_BiasedMF_10.npy')

    progress.section("RefineLabelMF: Training")
    m, n = matrix_train.shape
    model = RefineLabelMF(m, n, rank, lamb=lam, confidence=confidence, batch_size=batch_size, gpu_on=gpu_on,
                          norm_init_U=norm_RQ, norm_init_V=norm_Y, norm_init_uBias=norm_uBias, norm_init_iBias=norm_iBias,
                          unif_init_U=unif_RQ, unif_init_V=unif_Y, unif_init_uBias=unif_uBias, unif_init_iBias=unif_iBias)
    metric_names = ['NLL', 'AUC']
    RQ, Y, user_bias, item_bias, refined_label, user_item_pairs, prediction = model.train_model(matrix_train,
                                                                                                matrix_valid,
                                                                                                iteration,
                                                                                                metric_names)

    # if gpu_on:
    #     np.savetxt('Matlab/refinelabelmf_prediction.txt', cp.asnumpy(prediction))
    # else:
    #     np.savetxt('Matlab/refinelabelmf_prediction.txt', prediction)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
Example #19
0
def cml_normalized(matrix_train, time_stamp_matrix=None, embeded_matrix=np.empty((0)),
                   iteration=100, lam=80, rank=200, seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train

    from utils.io import load_numpy
    time_stamp_matrix = load_numpy(path='datax/', name='Rtime.npz')
    orders = get_orders(time_stamp_matrix.multiply(matrix_train))

    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = NormalizedCollaborativeMetricLearning(num_users=m, num_items=n, embed_dim=rank, cov_loss_weight=lam)

    model.train_model(matrix_input, orders, iteration)

    RQ = model.get_RQ()
    Y = model.get_Y().T
    tf.reset_default_graph()
    return RQ, Y, None
Example #20
0
def als(matrix_train,
        embeded_matrix=np.empty((0)),
        iteration=4,
        lam=80,
        rank=200,
        alpha=100,
        seed=1,
        **unused):
    """
    :param matrix_train: rating matrix
    :param embeded_matrix: item or user embedding matrix(side info)
    :param iteration: number of alternative solving
    :param lam: regularization parameter
    :param rank: SVD top K eigenvalue ranks
    :param alpha: re-weighting parameter
    :param gpu: GPU computation or CPU computation. GPU usually does 2X speed of CPU
    :param seed: Random initialization seed
    :return:
    """

    progress = WorkSplitter()
    progress.subsection("Alternative Item-wised Optimization")
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_train.shape

    matrix_coo = matrix_train.tocoo()

    cold_rows, cold_cols = get_cold(matrix_coo, m, n)

    np.random.seed(1)
    U = torch.tensor(
        np.random.normal(0, 0.01, size=(m, rank)).astype(np.float32)).float()
    V = torch.tensor(
        np.random.normal(0, 0.01, size=(n, rank)).astype(np.float32)).float()

    U[cold_rows] = 0
    V[cold_cols] = 0

    for i in xrange(iteration):
        progress.subsubsection("Iteration: {0}".format(i))
        solve(matrix_input.T, U, V, lam=lam, rank=rank, alpha=alpha)
        solve(matrix_input, V, U, lam=lam, rank=rank, alpha=alpha)

    return U.numpy(), V.numpy().T, None
Example #21
0
def biasedmf(matrix_train,
             matrix_valid,
             matrix_unif_train,
             iteration=100,
             lam=0.01,
             rank=50,
             seed=0,
             batch_size=500,
             way=None,
             gpu_on=True,
             **unused):
    progress = WorkSplitter()

    progress.section("BiasedMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("BiasedMF: Training")
    m, n = matrix_train.shape
    model = BiasedMF(m,
                     n,
                     rank,
                     lamb=lam,
                     batch_size=batch_size,
                     gpu_on=gpu_on)
    metric_names = ['NLL', 'AUC']

    if way == 'unif':
        RQ, Y, user_bias, item_bias, _ = model.train_model(
            matrix_unif_train, matrix_valid, iteration, metric_names)
    elif way == 'combine':
        matrix_train += matrix_unif_train
        RQ, Y, user_bias, item_bias, _ = model.train_model(
            matrix_train, matrix_valid, iteration, metric_names)
    else:
        RQ, Y, user_bias, item_bias, prediction = model.train_model(
            matrix_train, matrix_valid, iteration, metric_names)

    # if gpu_on:
    #     np.savetxt('Matlab/biasedmf_prediction.txt', cp.asnumpy(prediction))
    # else:
    #     np.savetxt('Matlab/biasedmf_prediction.txt', prediction)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
def ce_vae(matrix_train,
           matrix_train_keyphrase,
           embeded_matrix=np.empty((0)),
           epoch=100,
           lamb_l2=80.0,
           lamb_keyphrase=1.0,
           lamb_latent=5.0,
           lamb_rating=1.0,
           beta=0.2,
           learning_rate=0.0001,
           rank=200,
           corruption=0.5,
           optimizer="RMSProp",
           seed=1,
           **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train
    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    matrix_input_keyphrase = matrix_train_keyphrase

    model = CE_VAE(observation_dim=matrix_input.shape[1],
                   keyphrase_dim=matrix_input_keyphrase.shape[1],
                   latent_dim=rank,
                   batch_size=128,
                   lamb_l2=lamb_l2,
                   lamb_keyphrase=lamb_keyphrase,
                   lamb_latent=lamb_latent,
                   lamb_rating=lamb_rating,
                   beta=beta,
                   learning_rate=learning_rate,
                   observation_distribution="Gaussian",
                   optimizer=Optimizer[optimizer])

    model.train_model(matrix_input, matrix_input_keyphrase, corruption, epoch)

    return model
Example #23
0
def uncertainty(Rtrain, df_input, rank):
    progress = WorkSplitter()
    m, n = Rtrain.shape

    valid_models = vaes.keys()

    results = []

    for run in range(1):

        for idx, row in df_input.iterrows():
            row = row.to_dict()

            if row['model'] not in valid_models:
                continue

            progress.section(json.dumps(row))

            if 'optimizer' not in row.keys():
                row['optimizer'] = 'RMSProp'

            model = vaes[row['model']](n,
                                       rank,
                                       batch_size=100,
                                       lamb=row['lambda'],
                                       optimizer=Regularizer[row['optimizer']])

            model.train_model(Rtrain,
                              corruption=row['corruption'],
                              epoch=row['iter'])
            data_batches = model.get_batches(Rtrain, batch_size=100)
            progress.subsection("Predict")
            for batch in tqdm(data_batches):
                batch_size = batch.shape[0]
                _, stds = model.uncertainty(batch.todense())
                num_rated = np.squeeze(np.asarray(np.sum(batch, axis=1)))
                std = np.mean(stds, axis=1)
                results.append(
                    pd.DataFrame({
                        'model': [row['model']] * batch_size,
                        'numRated': num_rated,
                        'std': std
                    }))

    return pd.concat(results)
def wrsamplemf(matrix_train,
               matrix_valid,
               matrix_unif_train,
               iteration=100,
               lam=0.01,
               rank=50,
               seed=0,
               batch_size=500,
               gpu_on=True,
               **unused):
    progress = WorkSplitter()

    progress.section("WRSampleMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("WRSampleMF: Training")
    m, n = matrix_train.shape

    marks = sparse.csr_matrix(matrix_train.shape)
    marks[(matrix_train != 0).nonzero()] = 1

    matrix_train += matrix_unif_train
    num_samples = len(matrix_train.nonzero()[0])

    model = WRSampleMF(m,
                       n,
                       rank,
                       num_samples,
                       lamb=lam,
                       batch_size=batch_size,
                       gpu_on=gpu_on)
    metric_names = ['NLL', 'AUC']

    RQ, Y, user_bias, item_bias, confidence, user_item_pairs, prediction = model.train_model(
        matrix_train, marks, matrix_valid, iteration, metric_names)

    # np.savetxt('Matlab/wrsamplemf_samples.txt', user_item_pairs)
    # np.savetxt('Matlab/wrsamplemf_weights.txt', confidence)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Load Data
    progress.section("Load Data")
    start_time = time.time()
    data = Data(args.path, args.train, args.valid,is_lb=True)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))
    
    #build model
    progress.section("Build Model")

    model = FeatureNet(data.n_token, data.n_feature, [1024, 2000, 1000, 500, 100])
    model.cuda()
    print(model)

    model.cuda()
    model.load_state_dict(torch.load(args.checkpoint))

    print(model)
    lb_loader = data.instance_a_lb_loader(args.batch)

    lbs = {'user_lb': list(), 'tweet_lb': list()}
    preds = []
    model = model.eval()
    with torch.no_grad():
        lb_iterator = tqdm(lb_loader, desc="lb")
        for _, batch in enumerate(lb_iterator):
            token, feature, tweet_lb, user_lb = batch[0].float().cuda(), batch[1].float().cuda(), batch[2], batch[3]#,batch[4].cuda()
            pred = torch.sigmoid(model(token,feature)).detach().cpu().numpy()
            lbs['tweet_lb'] += tweet_lb[0]
            lbs['user_lb'] += user_lb[0]
            preds.append(pred)

        final_csv = pd.DataFrame(lbs)
        preds = np.float64(np.vstack(preds))
        if not os.path.exists(args.spath):
            os.makedirs(args.spath)

        print("Generating CSVs...")
        for i, engage in enumerate(["reply", "retweet", "comment", "like"]):
            final_csv[engage] = preds[:,i]
            final_csv[['tweet_lb','user_lb',engage]].to_csv(os.path.join(args.spath, engage+'.csv'),index=False, header=False)
def lookup(train, validation, params, measure='Cosine', gpu_on=True):
    progress = WorkSplitter()
    df = pd.DataFrame(columns=['model'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        RQ = np.load('latent/U_{0}_{1}.npy'.format(algorithm, params['rank']))
        Y = np.load('latent/V_{0}_{1}.npy'.format(algorithm, params['rank']))
        if os.path.isfile('latent/B_{0}_{1}.npy'.format(
                algorithm, params['rank'])):
            Bias = np.load('latent/B_{0}_{1}.npy'.format(
                algorithm, params['rank']))
        else:
            Bias = None

        progress.subsection("Prediction")

        prediction = predict(matrix_U=RQ,
                             matrix_V=Y,
                             measure=measure,
                             bias=Bias,
                             topK=params['topK'][-1],
                             matrix_Train=train,
                             gpu=gpu_on)

        progress.subsection("Evaluation")

        result = evaluate(prediction, validation, params['metric'],
                          params['topK'])

        result_dict = {'model': algorithm}

        for name in result.keys():
            result_dict[name] = [
                round(result[name][0], 4),
                round(result[name][1], 4)
            ]

        df = df.append(result_dict, ignore_index=True)
    return df
Example #27
0
def unionsamplemf(matrix_train,
                  matrix_valid,
                  matrix_unif_train,
                  iteration=100,
                  lam=0.01,
                  rank=50,
                  seed=0,
                  batch_size=500,
                  confidence=0.9,
                  gpu_on=True,
                  **unused):
    progress = WorkSplitter()

    progress.section("UnionSampleMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("UnionSampleMF: Training")
    m, n = matrix_train.shape
    model = UnionSampleMF(m,
                          n,
                          rank,
                          lamb=lam,
                          batch_size=batch_size,
                          gpu_on=gpu_on,
                          confidence=confidence)
    metric_names = ['NLL', 'AUC']

    marks = sparse.csr_matrix(matrix_train.shape)
    marks[(matrix_train != 0).nonzero()] = 1

    matrix_train += matrix_unif_train
    RQ, Y, user_bias, item_bias = model.train_model(matrix_train, marks,
                                                    matrix_valid, iteration,
                                                    metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
Example #28
0
def restrictedbatchsamplemf(matrix_train,
                            matrix_valid,
                            matrix_unif_train,
                            iteration=100,
                            lam=0.01,
                            rank=50,
                            seed=0,
                            batch_size=500,
                            gpu_on=True,
                            step=3,
                            way=None,
                            **unused):
    progress = WorkSplitter()

    progress.section("RestrictedBatchSampleMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("RestrictedBatchSampleMF: Training")
    m, n = matrix_train.shape

    model = BatchSampleMF(m,
                          n,
                          rank,
                          lamb=lam,
                          batch_size=batch_size,
                          step=step,
                          gpu_on=gpu_on,
                          way=way)
    metric_names = ['NLL', 'AUC']

    RQ, Y, user_bias, item_bias = model.train_model(matrix_train,
                                                    matrix_unif_train,
                                                    matrix_valid, iteration,
                                                    metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias
Example #29
0
def autorec(matrix_train,
            matrix_valid,
            matrix_unif_train,
            iteration=100,
            lam=0.01,
            rank=50,
            seed=0,
            batch_size=256,
            way=None,
            gpu_on=True,
            **unused):
    progress = WorkSplitter()

    progress.section("AutoRec: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("AutoRec: Training")
    m, n = matrix_train.shape
    model = AutoRec(n, rank, lamb=lam, batch_size=batch_size, gpu_on=gpu_on)
    metric_names = ['NLL', 'AUC']

    if way == 'unif':
        RQ, X, xBias, Y, yBias = model.train_model(matrix_unif_train,
                                                   matrix_valid, iteration,
                                                   metric_names)
    elif way == 'combine':
        matrix_train += matrix_unif_train
        RQ, X, xBias, Y, yBias = model.train_model(matrix_train, matrix_valid,
                                                   iteration, metric_names)
    else:
        RQ, X, xBias, Y, yBias = model.train_model(matrix_train, matrix_valid,
                                                   iteration, metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, X, xBias, Y, yBias
def causalsamplemf(matrix_train, matrix_valid, matrix_unif_train, iteration=100, lam=0.01, lam2=0.01, rank=50,
                   seed=0, batch_size=500, gpu_on=True, **unused):
    progress = WorkSplitter()

    progress.section("CausalSampleMF: Set the random seed")
    np.random.seed(seed)
    tf.set_random_seed(seed)

    progress.section("CausalSampleMF: Training")
    m, n = matrix_train.shape

    # Create new item IDs for S_t (i.e., [n, n*2)
    unif_user_item_matrix = lil_matrix(matrix_unif_train)
    unif_user_item_pairs = np.asarray(unif_user_item_matrix.nonzero()).T
    unif_label = np.asarray(matrix_unif_train[unif_user_item_pairs[:, 0], unif_user_item_pairs[:, 1]]).T
    unif_user_item_pairs[:, 1] += n

    # Create new csr matrix including union of S_c and S_t
    norm_user_item_matrix = lil_matrix(matrix_train)
    norm_user_item_pairs = np.asarray(norm_user_item_matrix.nonzero()).T
    norm_label = np.asarray(matrix_train[norm_user_item_pairs[:, 0], norm_user_item_pairs[:, 1]]).T

    user_item_pairs = np.vstack((unif_user_item_pairs, norm_user_item_pairs))
    labels = np.vstack((unif_label, norm_label))
    matrix_train = sparse.csr_matrix(
        (labels[:, 0], (user_item_pairs[:, 0], user_item_pairs[:, 1])),
        shape=(m, n * 2), dtype='float32')

    model = CausalSampleMF(m, n, rank, lamb=lam, lamb2=lam2, batch_size=batch_size, gpu_on=gpu_on)
    metric_names = ['NLL', 'AUC']
    RQ, Y, user_bias, item_bias = model.train_model(matrix_train, matrix_valid, iteration, metric_names)

    model.sess.close()
    tf.reset_default_graph()

    return RQ, Y, user_bias, item_bias