Esempio n. 1
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    sampler = Sampler(trasR, n_neg=negSample, batch_size=batch_size)

    cml = CML(n_users, n_items, topN, split_method, eval_metrics, reg_cov,
              margin, use_rank_weight, clip_norm, n_factors, batch_size)
    scores = cml.train(fold + 1, trasR, tstsR, sampler)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    cml.close()

    return scores
Esempio n. 2
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    sampler = Sampler(trasR, gsize, negSample, batch_size)

    gbprmf = GBPRMF(n_users, n_items, topN, rho, gsize, split_method,
                    eval_metrics, reg, n_factors, batch_size)
    scores = gbprmf.train(fold + 1, trasR, tstsR, sampler)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), 'gsize=', gsize,
        'rho=', rho, 'reg=', reg)
    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    gbprmf.close()
    return scores
Esempio n. 3
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = loadSparseR(n_users, n_items, traFilePath)

    print(
        dataset_dir.split('/')[-2] + ':', trasR.shape, trasR.nnz,
        '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tra_tuple = np.array([(user, item, trasR[user, item])
                          for user, item in np.asarray(trasR.nonzero()).T
                          ])  # triad

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = loadSparseR(n_users, n_items, tstFilePath)
    tst_tuple = np.array([(user, item, tstsR[user, item])
                          for user, item in np.asarray(tstsR.nonzero()).T
                          ])  # triad

    sampler = Sampler(trasR=trasR, negRatio=.0, batch_size=batch_size)
    svd = SVD(n_users, n_items, eval_metrics, range_of_ratings, reg, n_factors,
              batch_size)
    scores = svd.train(fold + 1, tra_tuple, tst_tuple, sampler)

    print('fold=%d:' % fold,
          ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=',
          ','.join(['%.6f' % (score) for score in scores]))

    return scores
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    prigp = PRIGP(n_users, n_items, topK, topN, split_method, eval_metrics,
                  alpha, reg, n_factors, batch_size)

    scores = prigp.train(fold + 1, trasR, tstsR)

    print('topK=', topK, 'alpha=', alpha, 'reg=', reg)
    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    prigp.close()

    return scores
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    uicf = UserItemCF(n_users, n_items, topK, theta, topN, split_method,
                      eval_metrics)
    scores = uicf.train(fold + 1, trasR, tstsR)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    return scores
Esempio n. 6
0
def worker(fold, n_A, n_B, dataset_dir, A_name, B_name):
    A2B_trasR = lil_matrix(
        loadSparseR(
            n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' +
            str(fold + 1) + '_tra.txt'))
    B2A_trasR = lil_matrix(
        loadSparseR(
            n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' +
            str(fold + 1) + '_tra.txt'))

    trasR = lil_matrix((n_A + n_B, n_A + n_B))
    trasR[:n_A, :n_B] = A2B_trasR
    trasR[n_A:, n_B:] = B2A_trasR
    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    A2B_tstsR = lil_matrix(
        loadSparseR(
            n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' +
            str(fold + 1) + '_tst.txt'))
    B2A_tstsR = lil_matrix(
        loadSparseR(
            n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' +
            str(fold + 1) + '_tst.txt'))

    A2B_match_tstsR = A2B_tstsR.multiply(B2A_tstsR.T) > 0

    sc = SocialCollab(topN, split_method, eval_metrics)
    A2B_scores, B2A_scores = sc.train(fold + 1, A2B_trasR, B2A_trasR,
                                      A2B_match_tstsR)

    # bilateral_scores
    tst_Anum, tst_Bnum = len(set(A2B_match_tstsR.nonzero()[0])), len(
        set(A2B_match_tstsR.nonzero()[1]))
    bi_scores = [
        tst_Anum / float(tst_Anum + tst_Bnum) * A2B_scores[i] +
        tst_Bnum / float(tst_Anum + tst_Bnum) * B2A_scores[i]
        for i in range(len(eval_metrics))
    ]

    # print results
    print(','.join(['%s' % eval_metric
                    for eval_metric in eval_metrics]) + '@%d:' % (topN))
    # bilateral
    print(
        dataset_dir.split('/')[-2] + '@%d: bi@%d=' % (fold + 1, topN) +
        ','.join(['%.6f' % (score) for score in bi_scores]))
    return bi_scores
Esempio n. 7
0
def split_tra_tst(usernum, itemnum, inFilePath, cv_fold):
    sR = loadSparseR(usernum, itemnum, inFilePath)
    useritemrating_tuples = np.array([
        (user, item, sR[user, item])
        for user, item in np.asarray(sR.nonzero()).T
    ])
    np.random.shuffle(useritemrating_tuples)

    fold_ins_num = int(len(useritemrating_tuples) / cv_fold)
    outdir, filename = '/'.join(
        inFilePath.split('/')[:-1]) + '/', inFilePath.split('/')[-1]
    for cv_fold_ind in range(cv_fold):
        tst_outFilePath = outdir + filename.replace(
            '.', '_' + str(cv_fold_ind + 1) + '_tst.')
        tst = useritemrating_tuples[cv_fold_ind *
                                    fold_ins_num:(cv_fold_ind + 1) *
                                    fold_ins_num, :]
        saveTriads(tst, tst_outFilePath, isRatingInt=False)

        tra_outFilePath = outdir + filename.replace(
            '.', '_' + str(cv_fold_ind + 1) + '_tra.')
        tra = np.concatenate([
            useritemrating_tuples[:cv_fold_ind * fold_ins_num, :],
            useritemrating_tuples[(cv_fold_ind + 1) * fold_ins_num:, :]
        ])
        saveTriads(tra, tra_outFilePath, isRatingInt=False)
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold))

    print(dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz,
          '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold))

    sampler = Sampler(trasR, negRatio, batch_size)
    wrmf = WRMF(n_users, n_items, topN, split_method, eval_metrics, weight, reg, n_factors, batch_size)
    scores = wrmf.train(fold+1, trasR, tstsR, sampler)

    print(dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),'weight=',weight,'reg=',reg)
    print('fold=%d:' % (fold+1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=',
          ','.join(['%.6f' % (score) for score in scores]))

    return scores
Esempio n. 9
0
def worker(fold, n_A, n_B, dataset_dir, A_name, B_name):
    A2B_trasR = lil_matrix(
        loadSparseR(
            n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' +
            str(fold + 1) + '_tra.txt'))
    B2A_trasR = lil_matrix(
        loadSparseR(
            n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' +
            str(fold + 1) + '_tra.txt'))

    trasR = lil_matrix((n_A + n_B, n_A + n_B))
    trasR[:n_A, :n_B] = A2B_trasR
    trasR[n_A:, n_B:] = B2A_trasR
    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    A2B_tstsR = lil_matrix(
        loadSparseR(
            n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' +
            str(fold + 1) + '_tst.txt'))
    B2A_tstsR = lil_matrix(
        loadSparseR(
            n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' +
            str(fold + 1) + '_tst.txt'))

    rrtcf = RRK(n_A, n_B, topN, split_method, eval_metrics, reg, n_factors,
                batch_size)
    uni_scores, bi_scores = rrtcf.train(fold + 1, A2B_trasR, A2B_tstsR,
                                        B2A_trasR, B2A_tstsR)

    # print results
    print(','.join(['%s' % eval_metric
                    for eval_metric in eval_metrics]) + '@%d:' % (topN))
    # unilateral and bilateral
    print(
        dataset_dir.split('/')[-2] + '@%d: uni@%d=' % (fold + 1, topN) +
        ','.join(['%.6f' % (score) for score in uni_scores]) + '  bi@%d=' %
        (topN) + ','.join(['%.6f' % (score) for score in bi_scores]))

    return [uni_scores, bi_scores]