def cal_rec(p,cut): R_true = read_user('cf-test-1-users.dat') dir_save = 'cdl'+str(p) U = np.mat(np.loadtxt(dir_save+'/final-U.dat')) V = np.mat(np.loadtxt(dir_save+'/final-V.dat')) R = U*V.T num_u = R.shape[0] num_hit = 0 fp = open(dir_save+'/rec-list.dat','w') for i in range(num_u): if i!=0 and i%100==0: print 'Iter '+str(i) l_score = R[i,:].A1.tolist() pl = sorted(enumerate(l_score),key=lambda d:d[1],reverse=True) l_rec = list(zip(*pl)[0])[:cut] s_rec = set(l_rec) s_true = set(np.where(R_true[i,:]>0)[1].A1) cnt_hit = len(s_rec.intersection(s_true)) fp.write('%d:' % cnt_hit) fp.write(' '.join(map(str,l_rec))) fp.write('\n') fp.close()
cnt_hit = len(s_rec.intersection(s_true)) fp.write('%d:' % cnt_hit) fp.write(' '.join(map(str, l_rec))) fp.write('\n') fp.close() if __name__ == '__main__': # give the same p as given in cdl.py p = 1 M_low = 50 M_high = 300 cal_rec(p, M_high) dir_save = 'cdl%d' % p R_test = read_user('cf-test-1-users.dat') fp = open('rec-list.dat') lines = fp.readlines() total = 0 correct = 0 users = 0 total_items_liked = 0 num_users = len(range(R_test.shape[0])) # recall@M is calculated for M = 50 to 300 recall_levels = M_high-M_low + 1 recallArray = np.zeros(shape=(num_users,recall_levels)) for user_id in range(num_users):
cnt_hit = len(s_rec.intersection(s_true)) fp.write('%d:' % cnt_hit) fp.write(' '.join(map(str, l_rec))) fp.write('\n') fp.close() if __name__ == '__main__': # give the same p as given in cdl.py p = 1 M_low = 50 M_high = 300 cal_rec(p, M_high) dir_save = 'cdl%d' % p R_test = read_user('data/cf-test-1-users.dat') fp = open(dir_save + '/rec-list.dat') lines = fp.readlines() total = 0 correct = 0 users = 0 total_items_liked = 0 num_users = len(range(R_test.shape[0])) # recall@M is calculated for M = 50 to 300 recall_levels = M_high - M_low + 1 recallArray = np.zeros(shape=(num_users, recall_levels)) for user_id in range(num_users):
lv = 1e-2 # lambda_v/lambda_n in CDL dir_save = 'cdl%d' % p if not os.path.isdir(dir_save): os.system('mkdir %s' % dir_save) fp = open(dir_save + '/cdl.log', 'w') print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p, lambda_v, lambda_u, lv, K) fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \ (p,lambda_v,lambda_u,lv,K)) fp.close() if is_dummy: X = data.get_dummy_mult() R = data.read_dummy_user() else: X = data.get_mult() R = data.read_user() # set to INFO to see less information during training logging.basicConfig(level=logging.DEBUG) #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2, # internal_act='relu', output_act='relu') ae_model = AutoEncoderModel(mx.cpu(2), [X.shape[1], 100, K], pt_dropout=0.2, internal_act='relu', output_act='relu') train_X = X #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0, # lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) #V = np.zeros((train_X.shape[0],10)) V = np.random.rand(train_X.shape[0], K) / 10
s_true = set(np.where(R_true[i, :] > 0)[1]) cnt_hit = len(s_rec.intersection(s_true)) fp.write("%d:" % cnt_hit) fp.write(" ".join(map(str, l_rec))) fp.write("\n") fp.close() if __name__ == "__main__": # give the same p as given in cdl.py p = 4 cal_rec(p, 300) dir_save = "cdl%d" % p R_test = read_user("cf-test-1-users.dat") fp = open(dir_save + "/rec-list.dat") lines = fp.readlines() total = 0 correct = 0 users = 0 total_items_liked = 0 num_users = len(range(R_test.shape[0])) # recall@M is calculated for M = 50 to 300 M_low = 50 M_high = 300 recall_levels = M_high - M_low + 1 recallArray = np.zeros(shape=(num_users, recall_levels))
np.random.seed(1234) # set seed lv = 1e-2 # lambda_v/lambda_n in CDL dir_save = 'cdl%d' % p if not os.path.isdir(dir_save): os.system('mkdir %s' % dir_save) fp = open(dir_save+'/cdl.log','w') print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p,lambda_v,lambda_u,lv,K) fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \ (p,lambda_v,lambda_u,lv,K)) fp.close() if is_dummy: X = data.get_dummy_mult() R = data.read_dummy_user() else: X = data.get_mult() R = data.read_user() # set to INFO to see less information during training logging.basicConfig(level=logging.DEBUG) #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2, # internal_act='relu', output_act='relu') #mx.cpu() no param needed for cpu. ae_model = AutoEncoderModel(mx.cpu(), [X.shape[1],100,K], pt_dropout=0.2, internal_act='relu', output_act='relu') train_X = X #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0, # lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) #V = np.zeros((train_X.shape[0],10)) V = np.random.rand(train_X.shape[0],K)/10
import csv from data import read_user import numpy as np p = 2 user_id = 1 # read predicted results dir_save = 'cdl%d' % p csvReader = csv.reader(open('raw-data.csv', 'rb')) d_id_title = dict() for i, row in enumerate(csvReader): if i == 0: continue d_id_title[i - 1] = row[3] R_test = read_user('cf-test-1-users.dat') R_train = read_user('cf-train-1-users.dat') fp = open(dir_save + '/rec-list.dat') lines = fp.readlines() s_test = set(np.where(R_test[user_id, :] > 0)[1].A1) l_train = np.where(R_train[user_id, :] > 0)[1].A1.tolist() l_pred = map(int, lines[user_id].strip().split(':')[1].split(' ')) print '##### Articles in the Training Sets #####' for i in l_train: print d_id_title[i] print '\n##### Articles Recommended (Correct Ones Marked by Stars) #####' for i in l_pred: if i in s_test: print '* ' + d_id_title[i] else: print d_id_title[i] fp.close()
import csv from data import read_user import numpy as np p = 2 user_id = 1 # read predicted results dir_save = 'cdl%d' % p csvReader = csv.reader(open('raw-data.csv','rb')) d_id_title = dict() for i,row in enumerate(csvReader): if i==0: continue d_id_title[i-1] = row[3] R_test = read_user('cf-test-1-users.dat') R_train = read_user('cf-train-1-users.dat') fp = open(dir_save+'/rec-list.dat') lines = fp.readlines() s_test = set(np.where(R_test[user_id,:]>0)[1].A1) l_train = np.where(R_train[user_id,:]>0)[1].A1.tolist() l_pred = map(int,lines[user_id].strip().split(':')[1].split(' ')) print '##### Articles in the Training Sets #####' for i in l_train: print d_id_title[i] print '\n##### Articles Recommended (Correct Ones Marked by Stars) #####' for i in l_pred: if i in s_test: print '* '+d_id_title[i] else: print d_id_title[i] fp.close()
cnt_hit = len(s_rec.intersection(s_true)) fp.write('%d:' % cnt_hit) fp.write(' '.join(map(str, l_rec))) fp.write('\n') fp.close() if __name__ == '__main__': # give the same p as given in cdl.py p = 1 M_low = 50 M_high = 300 cal_rec(p, M_high) dir_save = 'cdl%d' % p R_test = read_user('data/test_P1_3.dat') fp = open(dir_save + '/rec-list.dat') lines = fp.readlines() total = 0 correct = 0 users = 0 total_items_liked = 0 num_users = len(range(R_test.shape[0])) # recall@M is calculated for M = 50 to 300 recall_levels = M_high - M_low + 1 recallArray = np.zeros(shape=(num_users, recall_levels)) for user_id in range(num_users):
def main(): logging.info('reading data') item_mat = data.get_mult() trainM = sparse.csr_matrix( data.read_user(f_in='data/dummy/cf-train-10-users.dat', num_u=50, num_v=1929)) testM = sparse.csr_matrix( data.read_user(f_in='data/dummy/cf-test-10-users.dat', num_u=50, num_v=1929)) trainList = list() testList = list() for user in range(trainM.shape[0]): negative = 0 for item in range(trainM.shape[1]): if trainM[user, item] == 1: trainList.append([user, item, 1]) else: if negative < 20: trainList.append([user, item, 0]) negative += 1 train = np.array(trainList).astype('float32') testList = list() for user in range(testM.shape[0]): negative = 0 for item in range(testM.shape[1]): if testM[user, item] == 1: testList.append([user, item, 1]) # else: # if negative < 10: # testList.append( [user, item, 0] ) # negative+=1 test = np.array(testList).astype('float32') num_item_feat = item_mat.shape[1] model = CollaborativeDeepLearning(item_mat, [num_item_feat, 50, 10]) model.pretrain(lamda_w=0.001, encoder_noise=0.3, epochs=10) model_history = model.fineture(train, test, lamda_u=0.01, lamda_v=0.1, lamda_n=0.1, lr=0.01, epochs=500) testing_rmse = model.getRMSE(test) print('Testing RMSE = {}'.format(testing_rmse)) import metrics print('AUC %s' % metrics.full_auc(model.cdl_model, testM)) import matplotlib.pyplot as plt M_low = 50 M_high = 300 recall_levels = M_high - M_low + 1 recallArray = np.zeros(6) x = 0 for n in [50, 100, 150, 200, 250, 300]: test_recall = metrics.recall_at_k(model.cdl_model, testM, k=n) recallArray[x] = test_recall print('Recall: %.2f.' % (test_recall)) x += 1 plt.plot([50, 100, 150, 200, 250, 300], recallArray) plt.ylabel("Recall") plt.xlabel("M") plt.title("Proposed: Recall@M") plt.show()