n_workers=3) model = Model(usernum, itemnum, args) sess.run(tf.global_variables_initializer()) T = 0.0 t_test = evaluate(model, dataset, args, sess) t_valid = evaluate_valid(model, dataset, args, sess) print_result(0, 0.0, t_valid, t_test) print_result(0, 0.0, t_valid, t_test, f=f) # print("[0, 0.0, {0}, {1}, {2}, {3}],".format(t_valid[0], t_valid[1], t_test[0], t_test[1])) t0 = time.time() for epoch in range(1, args.num_epochs + 1): for step in range(num_batch): u, seq, pos, neg = sampler.next_batch() user_emb_table, item_emb_table, attention, auc, loss, _ = sess.run( [ model.user_emb_table, model.item_emb_table, model.attention, model.auc, model.loss, model.train_op ], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) #if epoch % args.print_freq == 0: # with open("attention_map_{}.pickle".format(step), 'wb') as fw: # pickle.dump(attention, fw) # with open("batch_{}.pickle".format(step), 'wb') as fw:
T = 0.0 seqs, contexts, pos_contexts, test_items, valid_users = prepare_test_data(dataset=dataset, args=args) seqs_2, contexts_2, pos_contexts_2, test_items_2, valid_users_2 = prepare_valid_data(dataset=dataset, args=args) sample_seq = seqs[7] sample_context = contexts[7] sample_pos_context = pos_contexts[7] sample_test_item = test_items[7] sample_test_user = valid_users[7] i = 0 t0 = time.time() for epoch in range(1, args.num_epochs + 1): model.separate = False for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): u, seq, context, pos_context, pos, neg = sampler.next_batch() auc, loss, _ = sess.run([model.auc, model.loss, model.train_op], {model.u: np.reshape(u, [-1, 1]), model.input_seq: seq, model.input_contexts: context, model.input_pos_contexts: pos_context, model.pos: pos, model.neg: neg, model.is_training: True}) i = i + 1 if epoch % 100 == 0: t1 = time.time() - t0 T += t1 t_test = evaluate(model, sess, seqs, contexts, pos_contexts, test_items, valid_users, args, itemnum) t_valid = evaluate(model, sess, seqs_2, contexts_2, pos_contexts_2, test_items_2, valid_users_2, args, itemnum)
sampler = WarpSampler(user_train, usernum, itemnum, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3) valid_sampler = WarpSampler(user_valid, usernum, itemnum, batch_size=usernum, maxlen=args.maxlen, n_workers=1) test_sampler = WarpSampler(user_test, usernum, itemnum, batch_size=usernum, maxlen=args.maxlen, n_workers=1) # import IPython; IPython.embed() model = Model(usernum, itemnum, args) sess.run(tf.initialize_all_variables()) T = 0.0 t0 = time.time() try: for epoch in range(1, args.num_epochs + 1): losses = [] for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b', desc='Epoch=%d'%epoch): u, seq, pos, neg = sampler.next_batch() auc, loss, _ = sess.run([model.auc, model.loss, model.train_op], {model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True}) losses.append(loss) if epoch % 5 == 0: t1 = time.time() - t0 T += t1 print 'Evaluating', vu, vseq, vpos, vneg = valid_sampler.next_batch() tu, tseq, tpos, tneg = test_sampler.next_batch() _, vloss, _ = sess.run([model.auc, model.loss, model.train_op], {model.u: vu, model.input_seq: vseq, model.pos: vpos, model.neg: vneg, model.is_training: False})
model = Model(usernum, itemnum, args) # sess.run(tf.global_variables_initializer) # my code sess.run(tf.initialize_all_variables()) T = 0.0 t0 = time.time() try: for epoch in range(1, args.num_epochs + 1): for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): u, seq, pos, neg = sampler.next_batch( ) # get the tuples of user id, seq, pos and neg. e.g. user id tuple = (user1, user2, ...,), seq = (list of user1 seq, list of user2 seq,...) auc, loss, _ = sess.run( [model.auc, model.loss, model.train_op], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.text_emb: text_emb, model.is_training: True }) if epoch % 1 == 0: t1 = time.time() - t0 T += t1 print 'attention weights' attention = show_attention(model, dataset, args, sess,
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) model = MoHR(usernum, itemnum, Relationships, args) sess.run(tf.initialize_all_variables()) best_valid_auc = 0.5 best_iter = 0 num_batch = oneiteration / args.batch_size try: for i in range(args.maximum_epochs): for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): batch = sampler.next_batch() batch_u, batch_i, batch_i_mask, batch_ui_r, batch_ui_rp, batch_j, batch_jp, batch_lp_r, batch_lp_i, batch_lp_j, batch_lp_jp = batch _, train_loss, train_auc = sess.run((model.gds, model.loss, model.auc), {model.batch_u: batch_u, model.batch_i: batch_i, model.batch_ui_r: batch_ui_r, model.batch_ui_rp: batch_ui_rp, model.batch_j: batch_j, model.batch_jp: batch_jp, model.batch_lp_r: batch_lp_r, model.batch_lp_i: batch_lp_i, model.batch_lp_j: batch_lp_j, model.batch_lp_jp: batch_lp_jp } )
def main(): prepare_env() dataset, user_train, usernum, itemnum, num_batch = load_dataset() sampler = WarpSampler( user_train, usernum, itemnum, args=args, batch_size=args.batch_size, maxlen=args.maxlen, threshold_user=args.threshold_user, threshold_item=args.threshold_item, n_workers=3, ) graph, model, num_experts, expert_paths, global_step, saver = create_model( usernum, itemnum, args) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config, graph=graph) as sess: sess.run(tf.global_variables_initializer()) if num_experts > 1: for i, path in enumerate( expert_paths): # restore experts' variables restore_collection(path, "expert_{}".format(i), sess, graph) best_result = 0.0 best_res_path = os.path.join(args.train_dir, args.best_res_log) if os.path.isfile(best_res_path): with open(best_res_path, 'r') as inf: best_result = float(inf.readline().strip()) best_step = 0 no_improve = 0 save_path = tf.train.latest_checkpoint(args.train_dir) if save_path: saver.restore(sess, save_path) print("[restored] {}".format(save_path)) else: save_path = saver.save(sess, os.path.join(args.train_dir, "model.ckpt"), global_step) print("[saved] {}".format(save_path)) T = 0.0 t0 = time.time() t_valid = evaluate_valid(model, dataset, args, sess) print("[init] time = {}, best = {}, eval HR@{} = {}, HR@{} = {}],". format(time.time() - t0, best_result, args.k, t_valid[0], args.k1, t_valid[1])) if args.std_test: t0 = time.time() t_test = evaluate(model, dataset, args, sess) print( "[init] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]" .format(time.time() - t0, args.k, t_test[0], args.k1, t_test[1], args.k, t_test[2], args.k1, t_test[3])) t0 = time.time() for epoch in range(1, args.num_epochs + 1): # for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): total_loss = 0.0 for step in range(num_batch): u, seq, pos, neg = sampler.next_batch() if num_experts > 1: log_freq = 100 loss, _, global_step_val = sess.run( [model.loss, model.train_op, global_step], { model.u: u, model.input_seq: seq, model.pos: pos, model.is_training: True }) if step % log_freq == 0: print("[step-{}] {}/{}, avg_loss = {}".format( global_step_val, step + 1, num_batch, total_loss / log_freq)) total_loss = 0.0 else: total_loss += loss else: user_emb_table, item_emb_table, attention, auc, loss, _, global_step_val = sess.run( [ model.user_emb_table, model.item_emb_table, model.attention, model.auc, model.loss, model.train_op, global_step ], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) print("[step-{}] {}/{}, auc = {}, loss = {}".format( global_step_val, step + 1, num_batch, auc, loss)) sys.stdout.flush() if epoch % args.eval_freq == 0: t1 = time.time() T += t1 - t0 # t_test = evaluate(model, dataset, args, sess) t_valid = evaluate_valid(model, dataset, args, sess) t2 = time.time() # print("[{0}, {1}, {2}, {3}, {4}, {5}],".format(epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) print( "[epoch = {}, time = {} (train/eval = {}/{}), HR@{} = {}, HR@{} = {}]," .format(epoch, T, t1 - t0, t2 - t1, args.k, t_valid[0], args.k1, t_valid[1])) t0 = t2 # early stopping if t_valid[args.eval_tgt_idx] > best_result: print("[best_result] {} (step-{}) < {} (step-{})".format( best_result, best_step, t_valid[args.eval_tgt_idx], global_step_val)) best_result = t_valid[args.eval_tgt_idx] best_step = global_step_val # ckpt_paths = glob(os.path.join(args.train_dir, "model.ckpt*")) # for path in ckpt_paths: # os.remove(path) # print("[removed] {}".format(path)) with open(best_res_path, 'w') as outf: outf.write("{}".format(best_result)) save_path = saver.save( sess, os.path.join(args.train_dir, "model.ckpt"), global_step_val) print("[saved] {}".format(save_path)) no_improve = 0 else: print("[best_result] {} (step-{}) > {} (step-{})".format( best_result, best_step, t_valid[args.eval_tgt_idx], global_step_val)) no_improve += args.eval_freq if args.early_stop_epochs < 0: # turn off early stopping save_path = saver.save( sess, os.path.join(args.train_dir, "model.ckpt"), global_step_val) print("[saved] {}".format(save_path)) else: if no_improve >= args.early_stop_epochs: print( "[stop training] no improvement for {} epochs". format(no_improve)) break sys.stdout.flush() if args.std_test: t_test = evaluate(model, dataset, args, sess) print( "[final] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]" .format(time.time() - t0, args.k, t_test[0], args.k1, t_test[1], args.k, t_test[2], args.k1, t_test[3])) sampler.close() print("[Done]")
sess.run(tf.initialize_all_variables()) T = 0.0 t0 = time.time() threshold = 200 #try: for epoch in range(1, args.num_epochs + 1): for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): creator, seq, pos, neg = sampler.next_batch() auc, loss, _ = sess.run( [model.auc, model.loss, model.train_op], { model.creator: creator, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) print('epoch: ', epoch, 'auc: ', auc, 'loss: ', loss) if epoch > threshold and epoch % 3 == 0: t1 = time.time() - t0 T += t1 'Validation result: ' ndcg_10_val, recall_10_val = evaluate_valid(model, dataset, args, sess,
# Set train dir now = datetime.now() TRAIN_FILES_PATH = os.path.join( MODEL_PATH, os.path.basename(args.dataset), '{}_{}'.format(args.train_dir, now.strftime("%m-%d-%Y-%H-%M-%S"))) # Allow saving of model MODEL_SAVE_PATH = os.path.join(TRAIN_FILES_PATH, 'model.ckpt') saver = tf.train.Saver() if args.test_model: if os.path.exists(args.test_model): print('loaded saved model {}'.format(args.test_model)) saver.restore(sess, tf.train.latest_checkpoint(args.test_model)) # Start testing u, seq, pos, neg, timeseq, ratings_seq, hours_seq, days_seq, orig_seq = sampler.next_batch() auc, loss, _, summary = sess.run([model.auc, model.loss, model.train_op, model.merged], {model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.time_seq: timeseq, model.hours: hours_seq, model.days: days_seq, model.is_training: True}) print(auc) print(loss) t_test = evaluate(model, dataset, args, sess) logger.info('test (NDCG@10: %.4f, HR@10: %.4f)' % (t_test[0], t_test[1]))
def train_sasrec(n_args): if not os.path.exists("../../prediction_result/" + n_args.o_filename + ".csv"): if not os.path.isdir(n_args.dataset + '_' + n_args.train_dir): os.makedirs(n_args.dataset + '_' + n_args.train_dir) with open( os.path.join(n_args.dataset + '_' + n_args.train_dir, 'args.txt'), 'w') as f: f.write('\n'.join([ str(k) + ',' + str(v) for k, v in sorted(vars(n_args).items(), key=lambda x: x[0]) ])) f.close() dataset = data_partition(n_args.dataset, n_args.p_dataset, None) recall_s1 = Get_Recall_S1(n_args.recall_ds) # recall_v = Get_Recall_S1(n_args.recall_v) [ user_train, user_valid, user_test, user_pred, user_valid_, usernum, itemnum ] = dataset num_batch = math.ceil(len(user_train) / n_args.batch_size) cc = 0.0 for u in user_train: cc += len(user_train[u]) print('average sequence length: %.2f' % (cc / len(user_train))) f = open( os.path.join(n_args.dataset + '_' + n_args.train_dir, 'log.txt'), 'w') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) sampler = WarpSampler(user_train, usernum, itemnum, batch_size=n_args.batch_size, maxlen=n_args.maxlen, n_workers=4) model = Model(usernum, itemnum, n_args) if not os.listdir("../user_data/model_data/"): sess.run(tf.global_variables_initializer()) T = 0.0 t0 = time.time() try: for epoch in range(1, n_args.num_epochs + 1): for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): u, seq, pos, neg = sampler.next_batch() auc, loss, _ = sess.run( [model.auc, model.loss, model.train_op], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) if epoch % 20 == 0: t1 = time.time() - t0 T += t1 print('Evaluating') t_test = evaluate(model, dataset, n_args, sess) t_valid = evaluate_valid(model, dataset, n_args, sess) print('') print( 'epoch:%d, time: %f(s), valid (NDCG@50: %.4f, HR@10: %.4f), test (NDCG@50: %.4f, HR@10: %.4f)' % (epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) f.write(str(t_valid) + ' ' + str(t_test) + '\n') f.flush() t0 = time.time() saver = tf.train.Saver() saver.save(sess, "../user_data/model_data/sasrec_model.ckpt") predict_result(model, dataset, recall_s1, n_args, sess, type='pred') # predict_result(model, dataset, recall_v, args, sess, type='valid') except: sampler.close() f.close() exit(1) else: saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, "../user_data/model_data/sasrec_model.ckpt") predict_result(model, dataset, recall_s1, n_args, sess, type='pred') # predict_result(model, dataset, recall_v, args, sess, type='valid') f.close() sampler.close() print("Done")
def train(self): print('train start') #train, valid, artists_dic, albums_dic, titles, titles_len = spotify(self.mode, self.valid) sampler = WarpSampler(self.train_data, self.unigram_probs, batch_size=self.batch_size, n_negative=self.n_negative, check_negative=True) # sample some users to calculate recall validation items = self.from_numpy(np.arange(self.n_items)) prev_recall = 0 recall_score = 0.0000001 prev_ndcg = 0 ndcg_score = 0.0000001 epoch = 1 while epoch <= self.num_epochs: self.model.train() if prev_recall < recall_score and prev_ndcg < ndcg_score: prev_recall = recall_score prev_ndcg = ndcg_score self.save_model() print('Model saved') # train model losses = [] # run n mini-batches #try: # for obj in gc.get_objects(): # if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # print(type(obj), obj.size()) #except: # pass #self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) for i in tqdm(range(self.evaluation_every_n_batchs), desc="Optimizing..."): user_pos, neg = sampler.next_batch() pos_artists = self.from_numpy(self.artists_dic[user_pos[:, 1]]) pos_albums = self.from_numpy(self.albums_dic[user_pos[:, 1]]) neg_artists = self.from_numpy( np.array([ self.artists_dic[negative_sample] for negative_sample in neg ])).type(torch.long) neg_albums = self.from_numpy( np.array([ self.albums_dic[negative_sample] for negative_sample in neg ])).type(torch.long) titles = None #self.from_numpy(self.titles[user_pos[:, 0]]) titles_len = None #self.from_numpy(self.titles_len[user_pos[:, 0]]) user_pos = self.from_numpy(user_pos).type(torch.long) neg = self.from_numpy(neg).type(torch.long) self.model.zero_grad() pos_distances, distance_to_neg_items, closest_negative_item_distances = self.model( user_pos, pos_artists, pos_albums, neg, neg_artists, neg_albums, titles, titles_len) # / (self.optim_size / self.batch_size) loss = self.get_loss(pos_distances, distance_to_neg_items, closest_negative_item_distances) loss.backward(retain_graph=False) #torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip_norm) self.optimizer.step() self.clip_by_norm(self.model.module.user_embeddings) self.clip_by_norm(self.model.module.item_embeddings) self.clip_by_norm(self.model.module.artist_embeddings) self.clip_by_norm(self.model.module.album_embeddings) #self.clip_by_norm(self.model.title_embeddings) #self.model.title_embeddings.weight.data = self.clip_by_norm(self.model.title_embeddings.weight.data) #if (i+1) % (self.optim_size / self.batch_size) == 0 or i == self.evaluation_every_n_batchs-1: # self.optimizer.step() # self.model.zero_grad() losses.append(loss.detach().cpu().numpy()) torch.cuda.empty_cache() print("\nTraining loss {}".format(np.mean(losses))) epoch += 1 # compute recall in chunks to utilize speedup provided by Tensorflow artists = self.from_numpy(self.artists_dic) albums = self.from_numpy(self.albums_dic) titles = self.from_numpy(self.titles) titles_len = self.from_numpy(self.titles_len) #ratios = self.from_numpy(self.ratios) self.model.eval() for i in range(10): # create evaluator on validation set validation_recall = RecallEvaluator(self.model, self.train_data, self.valid_data[i]) # compute recall on validate set valid_recalls = [] valid_ndcgs = [] valid_users = np.array( list(set(self.valid_data[i].nonzero()[0])), ) #valid_users = list(set(self.valid_data[i].nonzero()[0])) for user_chunk in toolz.partition_all(50, valid_users): user_chunk = self.from_numpy(np.array(user_chunk)).type( torch.long) recall, ndcg = validation_recall.eval( user_chunk, items, artists, albums, titles[user_chunk], titles_len[user_chunk], None) valid_recalls.extend(recall) valid_ndcgs.extend(ndcg) #for user_chunk in valid_users: #items = np.array(self.val_candidates[user_chunk]) #if len(items) > 50: #artists = self.from_numpy(self.artists_dic[items]).type(torch.long) #albums = self.from_numpy(self.albums_dic[items]).type(torch.long) #items = self.from_numpy(items).type(torch.long) #user_chunk = self.from_numpy(np.array([user_chunk])).type(torch.long) #recall, ndcg = validation_recall.eval(user_chunk, items, artists, albums, titles[user_chunk], titles_len[user_chunk], ratios[user_chunk-990000]) #valid_recalls.extend([recall]) #valid_ndcgs.extend([ndcg]) #else: #print(len(items)) #valid_recalls.extend([[0.0]]) #valid_ndcgs.extend([[0.0]]) recall_score = np.mean(valid_recalls) ndcg_score = np.mean(valid_ndcgs) print('\nNo. {}'.format(i)) print("Recall on (sampled) validation set: {}".format( recall_score)) print( "Ndcg on (sampled) validation set: {}".format(ndcg_score)) print('Epoch: {}'.format(epoch)) torch.cuda.empty_cache() self.predict()