def train(): batch_size = configs["batch_size"] log_meter = configs["log_every"] total_steps = int(train_set.shape[0] / batch_size) train_usage_sample = [] first, last = data_loader.first_target, data_loader.last_target for batch_step in range(total_steps): f, t = batch_step * batch_size, (batch_step + 1) * batch_size item_batch = train_set[f:t, :] # [B, L+1] context_batch = item_batch[:, :-1] # [B, L] pos_target = item_batch[:, -1:] # [B, 1] neg_target = np.array([[random_neg(first, last, s[0])] for s in pos_target]) # [B, 1] _, loss_out, action = sess.run( [train_op, target_model.train_loss, policy_model.actions_train], feed_dict={ policy_model.input: context_batch, source_model.input_source_train: context_batch, target_model.input_train_pos: pos_target, target_model.input_train_neg: neg_target, }, ) train_usage_sample.extend(np.array(action).tolist()) if (batch_step + 1) % log_meter == 0: logging.info("\t<{:5d}/{:5d}> Loss: {:.4f}".format( batch_step + 1, total_steps, loss_out)) if configs["method"] == "hard": summary_block(train_usage_sample, len(configs["dilations"]), "Train")
def train_rl_off(): batch_size = configs["batch_size"] log_meter = configs["log_every"] total_steps = int(train_set.shape[0] / batch_size) action_nums = len(configs["dilations"]) first, last = data_loader.first_target, data_loader.last_target train_usage_sample = [] for batch_step in range(total_steps): f, t = batch_step * batch_size, (batch_step + 1) * batch_size item_batch = train_set[f:t, :] # [B, L+1] context_batch = item_batch[:, :-1] # [B, L] pos_target = item_batch[:, -1:] # [B, 1] neg_target = np.array([[random_neg(first, last, s[0])] for s in pos_target]) hard_action = sess.run( policy_model.test_action, feed_dict={ policy_model.input: context_batch, policy_model.method: np.array(1), policy_model.sample_action: np.ones((batch_size, action_nums)), }, ) _, action, loss = sess.run( [ train_finetune, policy_model.train_action, target_model.train_loss ], feed_dict={ source_model.input_source_train: context_batch, policy_model.input: context_batch, policy_model.method: np.array(-1), policy_model.sample_action: hard_action, target_model.input_train_pos: pos_target, target_model.input_train_neg: neg_target, }, ) train_usage_sample.extend(np.array(action).tolist()) if (batch_step + 1) % log_meter == 0: logging.info("\t<{:5d}/{:5d}> Loss: {:.4f}".format( batch_step + 1, total_steps, loss)) summary_block(train_usage_sample, action_nums, "Train")
def evaluate(): batch_size = configs["batch_size"] n_neg = configs["n_neg"] total_steps = int(test_set.shape[0] / batch_size) action_nums = len(configs["dilations"]) meter = SRSMetric(k_list=[5, 20]) meter.setup_and_clean() test_usage_sample = [] for batch_step in range(total_steps): f, t = batch_step * batch_size, (batch_step + 1) * batch_size batch = test_set[f:t, :] # [B, L+1] context = batch[:, :-1] pos_target = batch[:, -1:] neg_target = [ random_negs(l=1, r=data_loader.target_nums, size=n_neg, pos=s[0]) for s in pos_target ] target = np.concatenate([neg_target, pos_target], 1) # [n_neg*neg+pos] test_probs, action = sess.run( [target_model.test_probs, policy_model.test_action], feed_dict={ source_model.input_source_test: context, policy_model.input: context, policy_model.method: np.array(1), policy_model.sample_action: np.ones((batch_size, action_nums)), target_model.input_test: target, }, ) ground_truth = [[n_neg]] * batch_size meter.submit(test_probs, ground_truth) test_usage_sample.extend(np.array(action).tolist()) summary_block(test_usage_sample, len(configs["dilations"]), "Test") meter.calc() meter.output_to_logger() return meter.mrr[5]
def train_rl_on(): batch_size = configs["batch_size"] log_meter = configs["log_every"] reward_k = configs["reward_k"] n_neg = configs["n_neg"] gamma = configs["gamma"] action_nums = len(configs["dilations"]) total_steps = int(train_set.shape[0] / batch_size) first, last = data_loader.first_target, data_loader.last_target train_usage_sample = [] for batch_step in range(total_steps): f, t = batch_step * batch_size, (batch_step + 1) * batch_size item_batch = train_set[f:t, :] # [B, L+1] context_batch = item_batch[:, :-1] # [B, L] pos_target = item_batch[:, -1:] # [B, 1] neg_target_train = np.array([[random_neg(first, last, s[0])] for s in pos_target]) neg_target_test = [ random_negs(l=1, r=data_loader.target_nums, size=n_neg, pos=s[0]) for s in pos_target ] target = np.concatenate([neg_target_test, pos_target], 1) # [n_neg*NEG+POS] # [B, n_neg + 1], [B, #Blocks] [soft_probs, soft_action] = sess.run( [target_model.test_probs, policy_model.test_action], feed_dict={ source_model.input_source_test: context_batch, policy_model.input: context_batch, policy_model.method: np.array(0), policy_model.sample_action: np.ones((batch_size, action_nums)), target_model.input_test: target, }, ) # [B, n_neg + 1], [B, #Blocks] [hard_probs, hard_action] = sess.run( [target_model.test_probs, policy_model.test_action], feed_dict={ source_model.input_source_test: context_batch, policy_model.input: context_batch, policy_model.method: np.array(1), policy_model.sample_action: np.ones((batch_size, action_nums)), target_model.input_test: target, }, ) reward_soft = reward_fn(soft_probs, n_neg, soft_action, gamma, k=reward_k) reward_hard = reward_fn(hard_probs, n_neg, hard_action, gamma, k=reward_k) reward_train = reward_soft - reward_hard _, _, action, loss, rl_loss = sess.run( [ train_rl, train_finetune, policy_model.train_action, target_model.train_loss, policy_model.rl_loss ], feed_dict={ source_model.input_source_train: context_batch, policy_model.input: context_batch, policy_model.method: np.array(-1), policy_model.sample_action: soft_action, policy_model.reward: reward_train, target_model.input_train_pos: pos_target, target_model.input_train_neg: neg_target_train, }, ) train_usage_sample.extend(np.array(action).tolist()) if (batch_step + 1) % log_meter == 0: logging.info( "\t<{:5d}/{:5d}> Loss: {:.4f}, RL-Loss: {:+.4f}, Reward-Avg: {:+.4f}" .format(batch_step + 1, total_steps, loss, rl_loss, np.mean(reward_train))) summary_block(train_usage_sample, len(configs["dilations"]), "Train")