np.random.seed(1234) # set seed lv = 1e-2 # lambda_v/lambda_n in CDL dir_save = 'cdl%d' % p if not os.path.isdir(dir_save): os.system('mkdir %s' % dir_save) fp = open(dir_save+'/cdl.log','w') print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p,lambda_v,lambda_u,lv,K) fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \ (p,lambda_v,lambda_u,lv,K)) fp.close() if is_dummy: X = data.get_dummy_mult() R = data.read_dummy_user() else: X = data.get_mult() R = data.read_user() # set to INFO to see less information during training logging.basicConfig(level=logging.DEBUG) #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2, # internal_act='relu', output_act='relu') #mx.cpu() no param needed for cpu. ae_model = AutoEncoderModel(mx.cpu(), [X.shape[1],100,K], pt_dropout=0.2, internal_act='relu', output_act='relu') train_X = X #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0, # lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) #V = np.zeros((train_X.shape[0],10))
np.random.seed(1234) # set seed lv = 1e-2 # lambda_v/lambda_n in CDL dir_save = 'cdl%d' % p if not os.path.isdir(dir_save): os.system('mkdir %s' % dir_save) fp = open(dir_save + '/cdl.log', 'w') print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p, lambda_v, lambda_u, lv, K) fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \ (p,lambda_v,lambda_u,lv,K)) fp.close() if is_dummy: X = data.get_dummy_mult() R = data.read_dummy_user() else: X = data.get_mult() R = data.read_user() # set to INFO to see less information during training logging.basicConfig(level=logging.DEBUG) #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2, # internal_act='relu', output_act='relu') ae_model = AutoEncoderModel(mx.cpu(2), [X.shape[1], 100, K], pt_dropout=0.2, internal_act='relu', output_act='relu') train_X = X #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0, # lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) #V = np.zeros((train_X.shape[0],10))
def main(): logging.info('reading data') item_mat = data.get_mult() trainM = sparse.csr_matrix( data.read_user(f_in='data/dummy/cf-train-10-users.dat', num_u=50, num_v=1929)) testM = sparse.csr_matrix( data.read_user(f_in='data/dummy/cf-test-10-users.dat', num_u=50, num_v=1929)) trainList = list() testList = list() for user in range(trainM.shape[0]): negative = 0 for item in range(trainM.shape[1]): if trainM[user, item] == 1: trainList.append([user, item, 1]) else: if negative < 20: trainList.append([user, item, 0]) negative += 1 train = np.array(trainList).astype('float32') testList = list() for user in range(testM.shape[0]): negative = 0 for item in range(testM.shape[1]): if testM[user, item] == 1: testList.append([user, item, 1]) # else: # if negative < 10: # testList.append( [user, item, 0] ) # negative+=1 test = np.array(testList).astype('float32') num_item_feat = item_mat.shape[1] model = CollaborativeDeepLearning(item_mat, [num_item_feat, 50, 10]) model.pretrain(lamda_w=0.001, encoder_noise=0.3, epochs=10) model_history = model.fineture(train, test, lamda_u=0.01, lamda_v=0.1, lamda_n=0.1, lr=0.01, epochs=500) testing_rmse = model.getRMSE(test) print('Testing RMSE = {}'.format(testing_rmse)) import metrics print('AUC %s' % metrics.full_auc(model.cdl_model, testM)) import matplotlib.pyplot as plt M_low = 50 M_high = 300 recall_levels = M_high - M_low + 1 recallArray = np.zeros(6) x = 0 for n in [50, 100, 150, 200, 250, 300]: test_recall = metrics.recall_at_k(model.cdl_model, testM, k=n) recallArray[x] = test_recall print('Recall: %.2f.' % (test_recall)) x += 1 plt.plot([50, 100, 150, 200, 250, 300], recallArray) plt.ylabel("Recall") plt.xlabel("M") plt.title("Proposed: Recall@M") plt.show()