def __init__(self, sess, num_neg = 0, top_K = [5,10], num_ranking_neg = 0, gmf_num_factors=16, gmf_regs_emb=[0,0], mlp_num_factors=16, mlp_layers=[20,10], mlp_regs_emb=[0,0], mlp_regs_layer=[0,0], lr=0.001, epochs=100, batch_size=128, T=10**3, verbose=False): '''Constructor''' # Parse the arguments and store them in the model self.session = sess self.num_neg = num_neg self.topk = top_K self.num_ranking_neg = num_ranking_neg self.gmf_num_factors = gmf_num_factors self.gmf_regs_user, self.gmf_regs_item = gmf_regs_emb self.mlp_num_factors = mlp_num_factors self.mlp_layers = mlp_layers self.mlp_regs_layer = mlp_regs_layer self.mlp_regs_user, self.mlp_regs_item = mlp_regs_emb self.lr = lr self.epochs = epochs self.batch_size = batch_size self.skip_step = T self.verbose = verbose gtl.print_paras(inspect.currentframe())
def __init__(self, sess, top_K=10, num_ranking_neg=0, num_factors=32, regs_ui=[0, 0], lr=0.001, epochs=100, batch_size=128, T=10**3, verbose=False): self.session = sess self.topK = top_K self.num_ranking_neg = num_ranking_neg self.num_factors = num_factors self.num_neg = 1 self.regs_user, self.regs_item = regs_ui self.lr = lr self.epochs = epochs self.batch_size = batch_size self.skip_step = T self.verbose = verbose gtl.print_paras(inspect.currentframe())
def train_one_epoch(self, epoch): uid, iid, lb = gtl.shuffle_list(self.train_uid, self.train_iid, self.train_labels) n_batches, total_loss, total_mae, total_rms = 0, 0, 0, 0 for i in range(self.num_batch): batch_user = uid[i * self.batch_size:(i + 1) * self.batch_size] batch_item = iid[i * self.batch_size:(i + 1) * self.batch_size] batch_labels = lb[i * self.batch_size:(i + 1) * self.batch_size] _, l, mae, rms = self.session.run( [self.opt, self.loss, self.mae, self.rms], feed_dict={ self.uid: batch_user, self.iid: batch_item, self.labels: batch_labels }) n_batches += 1 total_loss += l total_mae += mae total_rms += rms if self.verbose: if n_batches % self.skip_step == 0: print( "Epoch {0} Batch {1}: [Loss] = {2} [MAE] = {3}".format( epoch, n_batches, total_loss / n_batches, total_mae / n_batches)) if self.verbose: print("Epoch {0}: [Loss] {1}".format(epoch, total_loss / n_batches)) print("Epoch {0}: [MAE] {1} and [RMS] {2}".format( epoch, total_mae / n_batches, total_rms / n_batches))
def train_one_epoch(self, epoch): uid, iid, lb = gtl.shuffle_list(self.train_uid, self.train_iid, self.train_labels) neg_iid = [random.choice(self.neg_dict[u]) for u in uid] # neg_iid = [np.random.choice(self.neg_dict[u], 1).item() for u in self.train_uid] #This is very slow # neg_iid = [self.neg_dict[u][np.random.randint(len(self.neg_dict[u]))] for u in uid] #This is much faster n_batches, total_loss = 0, 0 for i in range(self.num_batch): batch_user = uid[i * self.batch_size:(i + 1) * self.batch_size] batch_item = iid[i * self.batch_size:(i + 1) * self.batch_size] batch_labels = lb[i * self.batch_size:(i + 1) * self.batch_size] batch_neg_iid = neg_iid[i * self.batch_size:(i + 1) * self.batch_size] # Randomly select one negative item j for each user # batch_neg_iid = [np.random.choice(self.neg_dict[u],1).item() for u in batch_user] _, l = self.session.run( [self.opt, self.loss], feed_dict={ self.uid: batch_user, self.iid: batch_item, self.neg_iid: batch_neg_iid }) n_batches += 1 total_loss += l if self.verbose: if n_batches % self.skip_step == 0: print("Epoch {0} Batch {1}: [Loss] = {2}".format( epoch, n_batches, total_loss / n_batches)) if self.verbose: print("Epoch {0}: [Loss] {1}".format(epoch, total_loss / n_batches))
def __init__(self, sess, top_K, num_factors=32, reg=0.0, reg_adv=0.0, noise_type='random', is_adv=False, eps=0.5, lr=0.001, is_prec=False, save_T=50, epochs=100, batch_size=128, T=10**3, verbose=False): # Parse the arguments and store them in the model self.session = sess self.num_factors = num_factors self.reg = reg self.reg_adv = reg_adv self.noise_type = noise_type self.is_adv = is_adv self.eps = eps self.topK = top_K self.lr = lr self.is_prec = is_prec self.save_T = save_T self.epochs = epochs self.batch_size = batch_size self.skip_step = T self.verbose = verbose self.metric1, self.metric2 = [], [] gtl.print_paras(inspect.currentframe())
def train_one_epoch(self, epoch): uid, iid = gtl.shuffle_list(self.train_uid, self.train_iid) # start_time = time.time() iid_neg = [random.choice(self.neg_dict[u]) for u in uid] # iid_neg = [np.random.choice(self.neg_dict[u]).item() for u in uid] # iid_neg = [self.neg_dict[u][np.random.randint(len(self.neg_dict[u]))] for u in uid] # print("Time={0}".format(time.time()-start_time)) n_batches, total_loss = 0, 0 for i in range(self.num_batch): if i == self.num_batch - 1: # break batch_uids = uid[i * self.batch_size:] batch_iids_pos = iid[i * self.batch_size:] batch_iids_neg = iid_neg[i * self.batch_size:] else: batch_uids = uid[i * self.batch_size:(i + 1) * self.batch_size] batch_iids_pos = iid[i * self.batch_size:(i + 1) * self.batch_size] batch_iids_neg = iid_neg[i * self.batch_size:(i + 1) * self.batch_size] # Randomly select one negative item j for each user # batch_iids_neg = [np.random.choice(self.neg_dict[u], 1).item() for u in batch_uids] feed_dict = { self.uid: batch_uids, self.pos_iid: batch_iids_pos, self.neg_iid: batch_iids_neg, } if self.is_adv: self.session.run([self.update_P, self.update_Q], feed_dict) _, l = self.session.run([self.optimizer, self.loss], feed_dict) n_batches += 1 total_loss += l if self.verbose: if n_batches % self.skip_step == 0: print("[All] Training Epoch {0} Batch {1}: [Loss] = {2}". format(epoch, n_batches, total_loss / n_batches)) if self.verbose: print("[Epoch Average] Training Epoch {0}: [Loss] {1}".format( epoch, total_loss / n_batches))
def train_one_epoch(self, epoch): uid1, iid1, lb1, uid2, iid2, lb2 = gtl.shuffle_list( self.train_uid1, self.train_iid1, self.train_labels1, self.train_uid2, self.train_iid2, self.train_labels2) # uid1, iid1, lb1, uid2, iid2, lb2 = list(uid1), list(iid1), list(lb1), list(uid2), list(iid2), list(lb2) n_batches = 0 total_loss = 0 total_mae1 = 0 total_mae2 = 0 for i in range(self.num_batch): batch_user1 = uid1[i * self.batch_size:(i + 1) * self.batch_size] batch_user2 = uid2[i * self.batch_size:(i + 1) * self.batch_size] batch_item1 = iid1[i * self.batch_size:(i + 1) * self.batch_size] batch_item2 = iid2[i * self.batch_size:(i + 1) * self.batch_size] batch_labels1 = lb1[i * self.batch_size:(i + 1) * self.batch_size] batch_labels2 = lb2[i * self.batch_size:(i + 1) * self.batch_size] _, l, mae1, mae2 = self.session.run( [self.opt, self.loss, self.mae1, self.mae2], feed_dict={ self.dom1_uid: batch_user1, self.dom2_uid: batch_user2, self.dom1_iid: batch_item1, self.dom2_iid: batch_item2, self.dom1_labels: batch_labels1, self.dom2_labels: batch_labels2 }) n_batches += 1 total_loss += l total_mae1 += mae1 total_mae2 += mae2 # total_mae1 += evl.evalMAE(batch_labels1,np.round(pred1)) # total_mae2 += evl.evalMAE(batch_labels2,np.round(pred2)) if self.verbose: if n_batches % self.skip_step == 0: print( "Epoch {0} Batch {1}: [Loss] = {2} [MAE] = {3}".format( epoch, n_batches, total_loss / n_batches, (total_mae1 + total_mae2) / (2 * n_batches))) print("Epoch {0}: [Loss] {1}".format(epoch, total_loss / n_batches)) print("Epoch {0}: [MAE] {1} and {2}".format(epoch, total_mae1 / n_batches, total_mae2 / n_batches))
num_epochs, batch_size, lr,\ regs_ui, regs_bias, num_factors = \ args.epochs, args.batch_size, args.lr,\ args.regs_ui, args.regs_bias,args.nfactors regs_ui = list(np.float32(eval(regs_ui))) regs_bias = list(np.float32(eval(regs_bias))) # original_matrix = mtl.load_original_matrix('Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::') # train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.1, seed=42) # gtl.matrix_to_mat('SVD_ML1M_90.mat',opt='coo', original=original_matrix, train=train_matrix, test=test_matrix) # gtl.matrix_to_excel('svd_all.xlsx',opt='coo',train_all=train_matrix, test_all=test_matrix) # print("Saved!") data = gtl.load_mat_as_matrix('Data/SVD_ML1M_90.mat', opt='coo') original_matrix, train_matrix, test_matrix = data['original'], data[ 'train'], data['test'] gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, gpu_options=gpu_options)) as sess: model = SVD(sess, num_factors=num_factors, regs_ui=regs_ui, regs_bias=regs_bias, lr=lr, epochs=num_epochs,
args.topk, args.ranking_ratio, args.nfactors, args.out_neg_ratio, args.epsilon,\ args.ae_regs ae_regs = list(np.float32(eval(ae_regs))) # original_matrix \ # = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::') # train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.1, seed=10) # original_matrix = mtl.matrix_to_binary(original_matrix, 0) # train_matrix, test_matrix = mtl.matrix_to_binary(train_matrix,0), mtl.matrix_to_binary(test_matrix,0) # gtl.matrix_to_mat('Data/ML1M_90_Data.mat', opt='coo', original=original_matrix, train=train_matrix, test=test_matrix) # data = gtl.load_mat_as_matrix('Data/ML1M_90_Data.mat', opt='coo') original_matrix, train_matrix, test_matrix = data['original'], data['train'], data['test'] gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, gpu_options=gpu_options)) as sess: model = GAN_AE(sess, top_K=topK, ranking_list_ratio=ranking_list_ratio, neg_ratio=neg_ratio, num_factors=num_factors, ae_regs=ae_regs, eps=epsilon, lr=lr, epochs=num_epochs, batch_size=batch_size, T=1000, verbose=True) model.prepare_data(original_matrix=original_matrix, train_matrix=train_matrix, test_matrix=test_matrix) model.build_model()
'flixster-5': 'Data/flixster-hr-5.mat', 'ymov-full': 'Data/ymov-hr-full.mat', 'ymov-345': 'Data/ymov-hr-345.mat', 'ymov-45': 'Data/ymov-hr-45.mat', 'ymov-5': 'Data/ymov-hr-5.mat', 'ymus-full': 'Data/ymus-hr-full.mat', 'ymus-345': 'Data/ymus-hr-345.mat', 'ymus-45': 'Data/ymus-hr-45.mat', 'ymus-5': 'Data/ymus-hr-5.mat', } dataset = 'filmtrust-34' path = path_dict[dataset] print('Loading Data From {0}'.format(path)) data = gtl.load_mat_as_matrix(path, opt='coo') original_matrix, train_matrix, test_matrix = data['original'], data['train'], data['test'] print('Users:{0}, Items:{1}, Ratings:{2}'.format(original_matrix.shape[0], original_matrix.shape[1], original_matrix.nnz)) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=24, inter_op_parallelism_threads=24, gpu_options=gpu_options)) as sess: model = AMF(sess, top_K=[5,10], num_factors=40, reg=0.015, reg_adv=100, noise_type='grad',
'ymov-45': 'Data/ymov-hr-45.mat', 'ymov-5': 'Data/ymov-hr-5.mat', 'ymus-full': 'Data/ymus-hr-full.mat', 'ymus-345': 'Data/ymus-hr-345.mat', 'ymus-45': 'Data/ymus-hr-45.mat', 'ymus-5': 'Data/ymus-hr-5.mat', } dataset = 'ciao-45' path = path_dict[dataset] date = '20180801' filename = 'itempop.mat' print('Loading Data From {0}'.format(path)) data = gtl.load_mat_as_matrix(path, opt='coo') original_matrix, train_matrix, test_matrix = data['original'], data[ 'train'], data['test'] print('Users:{0}, Items:{1}, Ratings:{2}'.format(original_matrix.shape[0], original_matrix.shape[1], original_matrix.nnz)) num_user, num_item = original_matrix.shape[0], original_matrix.shape[1] topK = [5, 10] total_hr, total_ndcg = np.zeros(len(topK)), np.zeros(len(topK)) tr_mat = train_matrix.transpose() item_pop_dict = {} item_pop_list = [] for item, ratings in enumerate(tr_mat.data):
def __init__(self, sess, top_K, num_factors=32, ae_regs=[0.0, 0.0, 0.0, 0.0], user_node_reg=0.0, eps=0.5, num_noise_factor=64, drop_out_rate=0.0, lr=0.001, is_user_node=False, noise_pos='W2', noise_type='random', robust_test=False, adv_training=False, noise_loss_ratio=0.0, noise_loss_ratio_W1=0.0, org_loss_ratio=0.0, is_prec=False, save_T=10, epochs=100, batch_size=128, T=10**3, verbose=False): # Parse the arguments and store them in the model self.session = sess self.num_factors = num_factors self.num_noise_factor = num_noise_factor self.ae_regs = ae_regs self.user_node_regs = user_node_reg self.is_user_node = is_user_node self.eps = eps self.topK = top_K self.dropout_rate = drop_out_rate self.noise_pos = noise_pos self.noise_type = noise_type self.lr = lr self.org_loss_ratio = org_loss_ratio self.noise_loss_ratio = noise_loss_ratio self.noise_loss_ratio_W1 = noise_loss_ratio_W1 self.robust_test = robust_test self.adv_training = adv_training self.save_T = save_T self.epochs = epochs self.batch_size = batch_size self.skip_step = T self.verbose = verbose self.is_prec = is_prec # Training Records self.metric1, self.metric2 = [], [] if self.noise_pos == 'USER': assert self.is_user_node assert not (self.robust_test and self.adv_training) gtl.print_paras(inspect.currentframe())
# original_matrix \ # = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::') # train_matrix, test_matrix \ # = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.2, random_state=10) # train_matrix, test_matrix \ # = mtl.matrix_split(original_matrix, opt='ranking', mode='mat', n_item_per_user=1, random_state=10) # original_matrix = mtl.matrix_to_binary(original_matrix, 0) # train_matrix = mtl.matrix_to_binary(train_matrix, 0) # test_matrix = mtl.matrix_to_binary(test_matrix, 0) # gtl.matrix_to_mat('Data/ML1M_Rank_200_1_Data.mat', opt='coo', original=original_matrix, train=train_matrix, test=test_matrix) data = gtl.load_mat_as_matrix('Data/ML1M_Rank_200_1_Data.mat', opt='coo') original_matrix, train_matrix, test_matrix = data['original'], data[ 'train'], data['test'] gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, gpu_options=gpu_options)) as sess: model = GAN_AE(sess, top_K=topK, neg_ratio=neg_ratio, num_factors=num_factors, ae_regs=ae_regs, eps=40, lr=lr,