}) #if epoch % args.print_freq == 0: # with open("attention_map_{}.pickle".format(step), 'wb') as fw: # pickle.dump(attention, fw) # with open("batch_{}.pickle".format(step), 'wb') as fw: # pickle.dump([u, seq], fw) # with open("user_emb.pickle", 'wb') as fw: # pickle.dump(user_emb_table, fw) # with open("item_emb.pickle", 'wb') as fw: # pickle.dump(item_emb_table, fw) if epoch % args.print_freq == 0: t1 = time.time() - t0 T += t1 #print 'Evaluating', t_test = evaluate(model, dataset, args, sess) t_valid = evaluate_valid(model, dataset, args, sess) #print '' #print 'epoch:%d, time: %f(s), valid (NDCG@10: %.4f, HR@10: %.4f), test (NDCG@10: %.4f, HR@10: %.4f)' % ( #epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1]) # print("[{0}, {1}, {2}, {3}, {4}, {5}],".format(epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) print_result(epoch, T, t_valid, t_test) print_result(epoch, T, t_valid, t_test, f=f) #f.write(str(t_valid) + ' ' + str(t_test) + '\n') #f.flush() t0 = time.time() f.close() sampler.close() print("Done")
model.batch_lp_jp: batch_lp_jp, } ) _valid_auc += valid_auc _test_auc += test_auc _valid_auc /= n_batch _test_auc /= n_batch f.write('%f %f\n' % (_valid_auc, _test_auc)) f.flush() if _valid_auc > best_valid_auc: best_valid_auc = _valid_auc best_test_auc = _test_auc best_iter = i model.save(sess) elif i >= best_iter + 50: break except: f.close() sampler.close() valid_sampler.close() test_sampler.close() exit(1) sampler.close() valid_sampler.close() test_sampler.close() f.write('Finished! %f, %f\n' % (best_valid_auc, best_test_auc)) f.close()
def main(): prepare_env() dataset, user_train, usernum, itemnum, num_batch = load_dataset() sampler = WarpSampler( user_train, usernum, itemnum, args=args, batch_size=args.batch_size, maxlen=args.maxlen, threshold_user=args.threshold_user, threshold_item=args.threshold_item, n_workers=3, ) graph, model, num_experts, expert_paths, global_step, saver = create_model( usernum, itemnum, args) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config, graph=graph) as sess: sess.run(tf.global_variables_initializer()) if num_experts > 1: for i, path in enumerate( expert_paths): # restore experts' variables restore_collection(path, "expert_{}".format(i), sess, graph) best_result = 0.0 best_res_path = os.path.join(args.train_dir, args.best_res_log) if os.path.isfile(best_res_path): with open(best_res_path, 'r') as inf: best_result = float(inf.readline().strip()) best_step = 0 no_improve = 0 save_path = tf.train.latest_checkpoint(args.train_dir) if save_path: saver.restore(sess, save_path) print("[restored] {}".format(save_path)) else: save_path = saver.save(sess, os.path.join(args.train_dir, "model.ckpt"), global_step) print("[saved] {}".format(save_path)) T = 0.0 t0 = time.time() t_valid = evaluate_valid(model, dataset, args, sess) print("[init] time = {}, best = {}, eval HR@{} = {}, HR@{} = {}],". format(time.time() - t0, best_result, args.k, t_valid[0], args.k1, t_valid[1])) if args.std_test: t0 = time.time() t_test = evaluate(model, dataset, args, sess) print( "[init] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]" .format(time.time() - t0, args.k, t_test[0], args.k1, t_test[1], args.k, t_test[2], args.k1, t_test[3])) t0 = time.time() for epoch in range(1, args.num_epochs + 1): # for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): total_loss = 0.0 for step in range(num_batch): u, seq, pos, neg = sampler.next_batch() if num_experts > 1: log_freq = 100 loss, _, global_step_val = sess.run( [model.loss, model.train_op, global_step], { model.u: u, model.input_seq: seq, model.pos: pos, model.is_training: True }) if step % log_freq == 0: print("[step-{}] {}/{}, avg_loss = {}".format( global_step_val, step + 1, num_batch, total_loss / log_freq)) total_loss = 0.0 else: total_loss += loss else: user_emb_table, item_emb_table, attention, auc, loss, _, global_step_val = sess.run( [ model.user_emb_table, model.item_emb_table, model.attention, model.auc, model.loss, model.train_op, global_step ], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) print("[step-{}] {}/{}, auc = {}, loss = {}".format( global_step_val, step + 1, num_batch, auc, loss)) sys.stdout.flush() if epoch % args.eval_freq == 0: t1 = time.time() T += t1 - t0 # t_test = evaluate(model, dataset, args, sess) t_valid = evaluate_valid(model, dataset, args, sess) t2 = time.time() # print("[{0}, {1}, {2}, {3}, {4}, {5}],".format(epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) print( "[epoch = {}, time = {} (train/eval = {}/{}), HR@{} = {}, HR@{} = {}]," .format(epoch, T, t1 - t0, t2 - t1, args.k, t_valid[0], args.k1, t_valid[1])) t0 = t2 # early stopping if t_valid[args.eval_tgt_idx] > best_result: print("[best_result] {} (step-{}) < {} (step-{})".format( best_result, best_step, t_valid[args.eval_tgt_idx], global_step_val)) best_result = t_valid[args.eval_tgt_idx] best_step = global_step_val # ckpt_paths = glob(os.path.join(args.train_dir, "model.ckpt*")) # for path in ckpt_paths: # os.remove(path) # print("[removed] {}".format(path)) with open(best_res_path, 'w') as outf: outf.write("{}".format(best_result)) save_path = saver.save( sess, os.path.join(args.train_dir, "model.ckpt"), global_step_val) print("[saved] {}".format(save_path)) no_improve = 0 else: print("[best_result] {} (step-{}) > {} (step-{})".format( best_result, best_step, t_valid[args.eval_tgt_idx], global_step_val)) no_improve += args.eval_freq if args.early_stop_epochs < 0: # turn off early stopping save_path = saver.save( sess, os.path.join(args.train_dir, "model.ckpt"), global_step_val) print("[saved] {}".format(save_path)) else: if no_improve >= args.early_stop_epochs: print( "[stop training] no improvement for {} epochs". format(no_improve)) break sys.stdout.flush() if args.std_test: t_test = evaluate(model, dataset, args, sess) print( "[final] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]" .format(time.time() - t0, args.k, t_test[0], args.k1, t_test[1], args.k, t_test[2], args.k1, t_test[3])) sampler.close() print("[Done]")
# whether to enable rank weight. If True, the loss will be scaled by the estimated # log-rank of the positive items. If False, no weight will be applied. # This is particularly useful to speed up the training for large item set. # Weston, Jason, Samy Bengio, and Nicolas Usunier. # "Wsabie: Scaling up to large vocabulary image annotation." IJCAI. Vol. 11. 2011. use_rank_weight=True, # whether to enable covariance regularization to encourage efficient use of the vector space. # More useful when the size of embedding is smaller (e.g. < 20 ). use_cov_loss=False, # weight of the cov_loss cov_loss_weight=1) log_file = open( 'dataset_' + args.dataset + '_margin_' + str(args.margin) + '_lr_' + str(args.lr) + '.csv', "w") monitor = Monitor(log_file=log_file) optimize(model, sampler, train, valid, max_steps=args.max_steps, monitor=monitor, verbose=args.verbose) print("%s close sampler, close and save to log file" % datetime.now()) log_file.close() sampler.close() # important! stop multithreading print("%s log file and sampler have closed" % datetime.now())
def train_sasrec(n_args): if not os.path.exists("../../prediction_result/" + n_args.o_filename + ".csv"): if not os.path.isdir(n_args.dataset + '_' + n_args.train_dir): os.makedirs(n_args.dataset + '_' + n_args.train_dir) with open( os.path.join(n_args.dataset + '_' + n_args.train_dir, 'args.txt'), 'w') as f: f.write('\n'.join([ str(k) + ',' + str(v) for k, v in sorted(vars(n_args).items(), key=lambda x: x[0]) ])) f.close() dataset = data_partition(n_args.dataset, n_args.p_dataset, None) recall_s1 = Get_Recall_S1(n_args.recall_ds) # recall_v = Get_Recall_S1(n_args.recall_v) [ user_train, user_valid, user_test, user_pred, user_valid_, usernum, itemnum ] = dataset num_batch = math.ceil(len(user_train) / n_args.batch_size) cc = 0.0 for u in user_train: cc += len(user_train[u]) print('average sequence length: %.2f' % (cc / len(user_train))) f = open( os.path.join(n_args.dataset + '_' + n_args.train_dir, 'log.txt'), 'w') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) sampler = WarpSampler(user_train, usernum, itemnum, batch_size=n_args.batch_size, maxlen=n_args.maxlen, n_workers=4) model = Model(usernum, itemnum, n_args) if not os.listdir("../user_data/model_data/"): sess.run(tf.global_variables_initializer()) T = 0.0 t0 = time.time() try: for epoch in range(1, n_args.num_epochs + 1): for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): u, seq, pos, neg = sampler.next_batch() auc, loss, _ = sess.run( [model.auc, model.loss, model.train_op], { model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, model.is_training: True }) if epoch % 20 == 0: t1 = time.time() - t0 T += t1 print('Evaluating') t_test = evaluate(model, dataset, n_args, sess) t_valid = evaluate_valid(model, dataset, n_args, sess) print('') print( 'epoch:%d, time: %f(s), valid (NDCG@50: %.4f, HR@10: %.4f), test (NDCG@50: %.4f, HR@10: %.4f)' % (epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) f.write(str(t_valid) + ' ' + str(t_test) + '\n') f.flush() t0 = time.time() saver = tf.train.Saver() saver.save(sess, "../user_data/model_data/sasrec_model.ckpt") predict_result(model, dataset, recall_s1, n_args, sess, type='pred') # predict_result(model, dataset, recall_v, args, sess, type='valid') except: sampler.close() f.close() exit(1) else: saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, "../user_data/model_data/sasrec_model.ckpt") predict_result(model, dataset, recall_s1, n_args, sess, type='pred') # predict_result(model, dataset, recall_v, args, sess, type='valid') f.close() sampler.close() print("Done")