def test_model(model, sess, test_set, model_args): batch_size = model_args["batch_size"] total_steps = int(test_set.shape[0] / batch_size) mrr_list = {5: [], 20: []} hr_list = {5: [], 20: []} ndcg_list = {5: [], 20: []} time_buffer = [] for batch_step in range(total_steps): test_batch = test_set[batch_step * batch_size:(batch_step + 1) * batch_size, :] tic = time.time() pred_probs = sess.run(model.probs_test, feed_dict={model.input_test: test_batch}) toc = time.time() time_buffer.append(toc - tic) ground_truth = test_batch[:, -1] top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20]) indices_5 = [ np.argwhere(line == item) for line, item in zip(top_5_rank, ground_truth) ] indices_20 = [ np.argwhere(line == item) for line, item in zip(top_20_rank, ground_truth) ] mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5) mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20) mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub) hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub) ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub) mrr_list[5].extend(mrr5_sub) hr_list[5].extend(hr5_sub) ndcg_list[5].extend(ndcg5_sub) logging.info("[Test] Time: {:.3f}s +- {:.3f}s per batch".format( np.mean(time_buffer), np.std(time_buffer))) ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20]) mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20]) hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20]) logging.info("\t MRR@5: {:.4f}, HIT@5: {:.4f}, NDCG@5: {:.4f}".format( mrr_5, hr_5, ndcg_5)) logging.info("\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format( mrr_20, hr_20, ndcg_20)) return mrr_5
def start(global_args): preset(global_args) model_args, train_set, test_set = get_data_and_config(global_args) ratio = global_args["occupy"] if ratio is None: gpu_config = get_proto_config() logging.info("Auto-growth GPU memory.") else: gpu_config = get_proto_config_with_occupy(ratio) logging.info("{:.1f}% GPU memory occupied.".format(ratio * 100)) sess = tf.Session(config=gpu_config) with tf.variable_scope("policy_net"): policy_net = PolicyNetGumbelGru(model_args) policy_net.build_policy() with tf.variable_scope(tf.get_variable_scope()): model = NextItNetGumbel(model_args) model.build_train_graph(policy_action=policy_net.action_predict) model.build_test_graph(policy_action=policy_net.action_predict) variables = tf.contrib.framework.get_variables_to_restore() model_variables = [ v for v in variables if not v.name.startswith("policy_net") ] policy_variables = [ v for v in variables if v.name.startswith("policy_net") ] with tf.variable_scope(tf.get_variable_scope()): optimizer_finetune = tf.train.AdamOptimizer( learning_rate=model_args["lr"], name="Adam_finetune") train_model = optimizer_finetune.minimize(model.loss, var_list=model_variables) with tf.variable_scope("policy_net"): optimizer_policy = tf.train.AdamOptimizer( learning_rate=model_args["lr"], name="Adam_policy") train_policy = optimizer_policy.minimize(model.loss, var_list=policy_variables) init = tf.global_variables_initializer() sess.run(init) # restore if needed if global_args["use_pre"]: restore_op = tf.train.Saver(var_list=model_variables) restore_op.restore(sess, global_args["pre"]) sess.run(tf.assign(policy_net.item_embedding, model.item_embedding)) logging.info(">>>>> Parameters loaded from pre-trained model.") else: logging.info(">>>>> Training without pre-trained model.") logging.info("Start @ {}".format(strftime("%m.%d-%H:%M:%S", localtime()))) saver = tf.train.Saver(max_to_keep=3) batch_size = model_args["batch_size"] log_meter = model_args["log_every"] total_iters = model_args["iterations"] total_steps = int(train_set.shape[0] / batch_size) test_steps = int(test_set.shape[0] / batch_size) model_save_path = global_args["store_path"] model_name = global_args["name"] logging.info("Batch size = {}, Batches = {}".format( batch_size, total_steps)) best_mrr_at5 = 0.0 for idx in range(total_iters): logging.info("-" * 30) logging.info("Iter: {} / {}".format(idx + 1, total_iters)) num_iter = 1 tic = time.time() train_usage_sample = [] for batch_step in range(total_steps): train_batch = train_set[batch_step * batch_size:(batch_step + 1) * batch_size, :] _, _, loss, action = sess.run( [ train_model, train_policy, model.loss, policy_net.action_predict ], feed_dict={ model.input_train: train_batch, policy_net.input: train_batch, }, ) train_usage_sample.extend(np.array(action).tolist()) if num_iter % log_meter == 0: logging.info("\t{:5d} /{:5d} Loss: {:.3f}".format( batch_step + 1, total_steps, loss)) num_iter += 1 summary_block(train_usage_sample, len(model_args["dilations"]), "Train") # 1. eval model mrr_list = {5: [], 20: []} hr_list = {5: [], 20: []} ndcg_list = {5: [], 20: []} test_usage_sample = [] for batch_step in range(test_steps): test_batch = test_set[batch_step * batch_size:(batch_step + 1) * batch_size, :] action, pred_probs = sess.run( [policy_net.action_predict, model.probs], feed_dict={ model.input_test: test_batch, policy_net.input: test_batch, }, ) test_usage_sample.extend(np.array(action).tolist()) ground_truth = test_batch[:, -1] top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20]) indices_5 = [ np.argwhere(line == item) for line, item in zip(top_5_rank, ground_truth) ] indices_20 = [ np.argwhere(line == item) for line, item in zip(top_20_rank, ground_truth) ] mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5) mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20) mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub) hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub) ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub) summary_block(test_usage_sample, len(model_args["dilations"]), "Test") ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20]) mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20]) hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20]) logging.info("<Metric>::TestSet") logging.info( "\t MRR@5: {:.4f}, HIT@5: {:.4f}, NDCG@5: {:.4f}".format( mrr_5, hr_5, ndcg_5)) logging.info( "\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format( mrr_20, hr_20, ndcg_20)) mrr_at5 = mrr_5 # 2. save model if mrr_at5 > best_mrr_at5: logging.info( ">>>>> Saving model due to better MRR@5: {:.4f} <<<<< ".format( mrr_at5)) saver.save( sess, os.path.join(model_save_path, "{}_{}.tfkpt".format(model_name, num_iter)), ) best_mrr_at5 = mrr_at5 toc = time.time() logging.info("Iter: {} / {} finish. Time: {:.2f} min".format( idx + 1, total_iters, (toc - tic) / 60)) sess.close()
def start(global_args): preset(global_args) model_args, train_set, test_set = get_data_and_config(global_args) # ---------------------- # Part.1 Build Model(s) # ---------------------- ratio = global_args["occupy"] if ratio is None: gpu_config = get_proto_config() logging.info("Auto-growth GPU memory.") else: gpu_config = get_proto_config_with_occupy(ratio) logging.info("{:.1f}% GPU memory occupied.".format(ratio * 100)) sess = tf.Session(config=gpu_config) with tf.variable_scope("policy_net"): policy_net = PolicyNetGumbelRL(model_args) policy_net.build_policy() with tf.variable_scope(tf.get_variable_scope()): model = NextItNetGumbelRL(model_args) model.build_train_graph(policy_action=policy_net.action_predict) model.build_test_graph(policy_action=policy_net.action_predict) # step-1, prepare parameters' name variables = tf.contrib.framework.get_variables_to_restore() model_variables = [ v for v in variables if not v.name.startswith("policy_net") ] policy_variables = [ v for v in variables if v.name.startswith("policy_net") ] # step-2, create optimizer with tf.variable_scope(tf.get_variable_scope()): optimizer_finetune = tf.train.AdamOptimizer( learning_rate=model_args["lr"], name="Adam_finetune") train_model = optimizer_finetune.minimize(model.loss, var_list=model_variables) # train_model_rl = optimizer_finetune.minimize( # policy_net.rl_loss, var_list=model_variables # ) with tf.variable_scope("policy_net"): optimizer_policy = tf.train.AdamOptimizer( learning_rate=model_args["lr"], name="Adam_policy") train_policy = optimizer_policy.minimize(model.loss, var_list=policy_variables) train_policy_rl = optimizer_policy.minimize(policy_net.rl_loss, var_list=policy_variables) # step-4, restore parameters if needed if not global_args["resume"]: init = tf.global_variables_initializer() sess.run(init) start_at = 0 if global_args["use_pre"]: # step-4.1 restore pre-trained parameters restore_op = tf.train.Saver(var_list=model_variables) restore_op.restore(sess, global_args["pre"]) # step-4.2 copy embedding to policy-net sess.run(tf.assign(policy_net.item_embedding, model.item_embedding)) logging.info(">>>>> Parameters loaded from pre-trained model.") else: logging.info(">>>>> Training without pre-trained model.") else: resume_op = tf.train.Saver() resume_op.restore(sess, global_args["resume_path"]) start_at = global_args["resume_at"] logging.info( ">>>>> Resume from checkpoint, start at epoch {}".format(start_at)) # ---------------------- # Part.2 Train # ---------------------- logging.info("Start @ {}".format(strftime("%m.%d-%H:%M:%S", localtime()))) saver = tf.train.Saver(max_to_keep=3) batch_size = model_args["batch_size"] log_meter = model_args["log_every"] total_iters = model_args["iter"] total_steps = int(train_set.shape[0] / batch_size) test_steps = int(test_set.shape[0] / batch_size) model_save_path = global_args["store_path"] model_name = global_args["name"] logging.info("Batch size = {}, Batches = {}".format( batch_size, total_steps)) best_mrr_at5 = 0.0 action_nums = len(model_args["dilations"]) for idx in range(start_at, total_iters): logging.info("-" * 30) if idx < global_args["rl_iter"]: rl_str = "OFF" else: rl_str = " ON" logging.info("[RL-{}] Iter: {} / {}".format(rl_str, idx + 1, total_iters)) num_iter = 1 tic = time.time() train_usage_sample = [] for batch_step in range(total_steps): train_batch = train_set[batch_step * batch_size:(batch_step + 1) * batch_size, :] if idx >= global_args["rl_iter"]: # 1. soft_result # 2. map_result # 3. advantage -> reward -> optimize [soft_probs, soft_action] = sess.run( [model.probs, policy_net.action_predict], feed_dict={ model.input_test: train_batch, policy_net.input: train_batch, policy_net.method: np.array(0), policy_net.sample_action: np.ones((batch_size, action_nums)), }, ) [hard_probs, hard_action] = sess.run( [model.probs, policy_net.action_predict], feed_dict={ model.input_test: train_batch, policy_net.input: train_batch, policy_net.method: np.array(1), policy_net.sample_action: np.ones((batch_size, action_nums)), }, ) ground_truth = train_batch[:, -1] reward_soft = reward_fn(soft_probs, ground_truth, soft_action, global_args["gamma"]) reward_hard = reward_fn(hard_probs, ground_truth, hard_action, global_args["gamma"]) reward_train = reward_soft - reward_hard _, _, _, action, loss, rl_loss = sess.run( [ train_policy_rl, train_policy, train_model, policy_net.action_predict, model.loss, policy_net.rl_loss, ], feed_dict={ model.input_train: train_batch, policy_net.input: train_batch, policy_net.method: np.array(-1), policy_net.sample_action: soft_action, policy_net.reward: reward_train, }, ) train_usage_sample.extend(np.array(action).tolist()) if num_iter % log_meter == 0: logging.info( "\t{:5d} /{:5d} Loss: {:.3f}, RL-Loss: {:.3f}, Reward-Avg: {:.3f}" .format( batch_step + 1, total_steps, loss, rl_loss, np.mean(reward_train), )) num_iter += 1 else: [hard_action] = sess.run( [policy_net.action_predict], feed_dict={ policy_net.method: np.array(1), policy_net.input: train_batch, policy_net.sample_action: np.ones((batch_size, action_nums)), }, ) [_, _, action, loss] = sess.run( [ train_model, train_policy, policy_net.action_predict, model.loss ], feed_dict={ model.input_train: train_batch, policy_net.input: train_batch, policy_net.sample_action: hard_action, policy_net.method: np.array(-1), }, ) train_usage_sample.extend(np.array(action).tolist()) if num_iter % log_meter == 0: logging.info("\t{:5d} /{:5d} Loss: {:.3f}".format( batch_step + 1, total_steps, loss)) num_iter += 1 summary_block(train_usage_sample, len(model_args["dilations"]), "Train") # 1. eval model mrr_list = {5: [], 20: []} hr_list = {5: [], 20: []} ndcg_list = {5: [], 20: []} test_usage_sample = [] for batch_step in range(test_steps): test_batch = test_set[batch_step * batch_size:(batch_step + 1) * batch_size, :] action, pred_probs = sess.run( [policy_net.action_predict, model.probs], feed_dict={ model.input_test: test_batch, policy_net.input: test_batch, policy_net.method: np.array(1), policy_net.sample_action: np.ones( (batch_size, action_nums)), }, ) test_usage_sample.extend(np.array(action).tolist()) ground_truth = test_batch[:, -1] top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20]) indices_5 = [ np.argwhere(line == item) for line, item in zip(top_5_rank, ground_truth) ] indices_20 = [ np.argwhere(line == item) for line, item in zip(top_20_rank, ground_truth) ] mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5) mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20) mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub) hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub) ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub) summary_block(test_usage_sample, len(model_args["dilations"]), "Test") ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20]) mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20]) hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20]) logging.info("<Metric>::TestSet") logging.info( "\t MRR@5: {:.4f}, HIT@5: {:.4f}, NDCG@5: {:.4f}".format( mrr_5, hr_5, ndcg_5)) logging.info( "\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format( mrr_20, hr_20, ndcg_20)) mrr_at5 = mrr_5 # 2. save model if mrr_at5 > best_mrr_at5: logging.info( ">>>>> Saving model due to better MRR@5: {:.4f} <<<<< ".format( mrr_at5)) saver.save( sess, os.path.join(model_save_path, "{}_{}.tfkpt".format(model_name, num_iter)), ) best_mrr_at5 = mrr_at5 toc = time.time() logging.info("Iter: {} / {} finish. Time: {:.2f} min".format( idx + 1, total_iters, (toc - tic) / 60)) sess.close()