def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR, n_neg=negSample, batch_size=batch_size) cml = CML(n_users, n_items, topN, split_method, eval_metrics, reg_cov, margin, use_rank_weight, clip_norm, n_factors, batch_size) scores = cml.train(fold + 1, trasR, tstsR, sampler) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) cml.close() return scores
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR, gsize, negSample, batch_size) gbprmf = GBPRMF(n_users, n_items, topN, rho, gsize, split_method, eval_metrics, reg, n_factors, batch_size) scores = gbprmf.train(fold + 1, trasR, tstsR, sampler) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), 'gsize=', gsize, 'rho=', rho, 'reg=', reg) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) gbprmf.close() return scores
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = loadSparseR(n_users, n_items, traFilePath) print( dataset_dir.split('/')[-2] + ':', trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tra_tuple = np.array([(user, item, trasR[user, item]) for user, item in np.asarray(trasR.nonzero()).T ]) # triad tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = loadSparseR(n_users, n_items, tstFilePath) tst_tuple = np.array([(user, item, tstsR[user, item]) for user, item in np.asarray(tstsR.nonzero()).T ]) # triad sampler = Sampler(trasR=trasR, negRatio=.0, batch_size=batch_size) svd = SVD(n_users, n_items, eval_metrics, range_of_ratings, reg, n_factors, batch_size) scores = svd.train(fold + 1, tra_tuple, tst_tuple, sampler) print('fold=%d:' % fold, ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=', ','.join(['%.6f' % (score) for score in scores])) return scores
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) prigp = PRIGP(n_users, n_items, topK, topN, split_method, eval_metrics, alpha, reg, n_factors, batch_size) scores = prigp.train(fold + 1, trasR, tstsR) print('topK=', topK, 'alpha=', alpha, 'reg=', reg) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) prigp.close() return scores
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) uicf = UserItemCF(n_users, n_items, topK, theta, topN, split_method, eval_metrics) scores = uicf.train(fold + 1, trasR, tstsR) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) return scores
def worker(fold, n_A, n_B, dataset_dir, A_name, B_name): A2B_trasR = lil_matrix( loadSparseR( n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' + str(fold + 1) + '_tra.txt')) B2A_trasR = lil_matrix( loadSparseR( n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' + str(fold + 1) + '_tra.txt')) trasR = lil_matrix((n_A + n_B, n_A + n_B)) trasR[:n_A, :n_B] = A2B_trasR trasR[n_A:, n_B:] = B2A_trasR print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) A2B_tstsR = lil_matrix( loadSparseR( n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' + str(fold + 1) + '_tst.txt')) B2A_tstsR = lil_matrix( loadSparseR( n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' + str(fold + 1) + '_tst.txt')) A2B_match_tstsR = A2B_tstsR.multiply(B2A_tstsR.T) > 0 sc = SocialCollab(topN, split_method, eval_metrics) A2B_scores, B2A_scores = sc.train(fold + 1, A2B_trasR, B2A_trasR, A2B_match_tstsR) # bilateral_scores tst_Anum, tst_Bnum = len(set(A2B_match_tstsR.nonzero()[0])), len( set(A2B_match_tstsR.nonzero()[1])) bi_scores = [ tst_Anum / float(tst_Anum + tst_Bnum) * A2B_scores[i] + tst_Bnum / float(tst_Anum + tst_Bnum) * B2A_scores[i] for i in range(len(eval_metrics)) ] # print results print(','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d:' % (topN)) # bilateral print( dataset_dir.split('/')[-2] + '@%d: bi@%d=' % (fold + 1, topN) + ','.join(['%.6f' % (score) for score in bi_scores])) return bi_scores
def split_tra_tst(usernum, itemnum, inFilePath, cv_fold): sR = loadSparseR(usernum, itemnum, inFilePath) useritemrating_tuples = np.array([ (user, item, sR[user, item]) for user, item in np.asarray(sR.nonzero()).T ]) np.random.shuffle(useritemrating_tuples) fold_ins_num = int(len(useritemrating_tuples) / cv_fold) outdir, filename = '/'.join( inFilePath.split('/')[:-1]) + '/', inFilePath.split('/')[-1] for cv_fold_ind in range(cv_fold): tst_outFilePath = outdir + filename.replace( '.', '_' + str(cv_fold_ind + 1) + '_tst.') tst = useritemrating_tuples[cv_fold_ind * fold_ins_num:(cv_fold_ind + 1) * fold_ins_num, :] saveTriads(tst, tst_outFilePath, isRatingInt=False) tra_outFilePath = outdir + filename.replace( '.', '_' + str(cv_fold_ind + 1) + '_tra.') tra = np.concatenate([ useritemrating_tuples[:cv_fold_ind * fold_ins_num, :], useritemrating_tuples[(cv_fold_ind + 1) * fold_ins_num:, :] ]) saveTriads(tra, tra_outFilePath, isRatingInt=False)
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix(matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print(dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix(matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR, negRatio, batch_size) wrmf = WRMF(n_users, n_items, topN, split_method, eval_metrics, weight, reg, n_factors, batch_size) scores = wrmf.train(fold+1, trasR, tstsR, sampler) print(dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),'weight=',weight,'reg=',reg) print('fold=%d:' % (fold+1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=', ','.join(['%.6f' % (score) for score in scores])) return scores
def worker(fold, n_A, n_B, dataset_dir, A_name, B_name): A2B_trasR = lil_matrix( loadSparseR( n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' + str(fold + 1) + '_tra.txt')) B2A_trasR = lil_matrix( loadSparseR( n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' + str(fold + 1) + '_tra.txt')) trasR = lil_matrix((n_A + n_B, n_A + n_B)) trasR[:n_A, :n_B] = A2B_trasR trasR[n_A:, n_B:] = B2A_trasR print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) A2B_tstsR = lil_matrix( loadSparseR( n_A, n_B, dataset_dir + A_name + '2' + B_name + '_' + str(fold + 1) + '_tst.txt')) B2A_tstsR = lil_matrix( loadSparseR( n_B, n_A, dataset_dir + B_name + '2' + A_name + '_' + str(fold + 1) + '_tst.txt')) rrtcf = RRK(n_A, n_B, topN, split_method, eval_metrics, reg, n_factors, batch_size) uni_scores, bi_scores = rrtcf.train(fold + 1, A2B_trasR, A2B_tstsR, B2A_trasR, B2A_tstsR) # print results print(','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d:' % (topN)) # unilateral and bilateral print( dataset_dir.split('/')[-2] + '@%d: uni@%d=' % (fold + 1, topN) + ','.join(['%.6f' % (score) for score in uni_scores]) + ' bi@%d=' % (topN) + ','.join(['%.6f' % (score) for score in bi_scores])) return [uni_scores, bi_scores]