class agent: def __init__(self): self.critic_loss = None self.factors_agent = None self.factors_critic = None self.history_len = 0 self.is_train = None self.loss_agent = None self.loss_critic = None self.model_agent = None self.model_critic = None self.optimizer_agent = None self.optimizer_critic = None self.pred = None self.reward = None def add_model(self): """This function calls the appropriate model builder""" self.model_agent = AgentModel(12, 20, 6) self.model_critic = CriticModel(11, 21, 10, 0) self.set_model_weights(self.model_agent) self.set_model_weights(self.model_critic) self.optimizer_agent = torch.optim.Adam(self.model_agent.parameters(), lr = 0.001) self.optimizer_critic = torch.optim.Adam(self.model_critic.parameters(), lr = 0.001) self.loss_agent = torch.nn.MSELoss() self.loss_critic = torch.nn.MSELoss() def add_prediction(self, prediction): """This function concatenates the prediciton with the critic input""" i = 0 j = self.history_len self.factors_critic[i, j, 0] = prediction['score'] self.factors_critic[i, j, 1] = prediction['r0'] self.factors_critic[i, j, 2] = prediction['r1'] self.factors_critic[i, j, 3] = prediction['r2'] self.factors_critic[i, j, 4] = prediction['r3'] self.factors_critic[i, j, 5] = prediction['r4'] self.factors_critic[i, j, 6] = prediction['r5'] self.factors_critic[i, j, 7] = prediction['sd'] self.factors_critic[i, j, 8] = prediction['avg'] self.factors_critic[i, j, 9] = prediction['m'] self.factors_critic[i, j, 10] = prediction['k'] def custom_loss_critic(self, target, selection, selection_averages, target_averages): """This returns the normalized cross correlation between target and selection""" # These lines here compute the cross-correlation between target and # selection top = np.multiply((selection - selection_averages), (target - target_averages)) top_sum = np.sum(top, axis = 0) bottom_selection = np.power((selection - selection_averages),2) bottom_targets = np.power((target - target_averages), 2) bottom_selection_sum = np.sum(bottom_selection, axis = 0) bottom_targets_sum = np.sum(bottom_targets, axis = 0) bottom = np.sqrt(np.multiply(bottom_selection_sum, bottom_targets_sum)) divided = np.divide(top_sum, bottom) divided = divided[~np.isnan(divided)] return(np.sum(divided)) def factorize(self, user_history): """This function factorizes a given user history, or batch of user histories, into factors for an lstm model""" # Reset the holding arrays self.factors_agent = np.zeros((1, 20, 12)) self.factors_critic = np.zeros((1, 21, 11)) # This i here is to conform with tensorflow input expectations i = 0 j = 0 for index, row in user_history.iterrows(): # The last entry in a history is the one we attempt to predict if j == (user_history.shape[0]): break # Truncating maximum history to ~1 day of continuous listening if j == 20: break # In an act of data reduction and factor selection, I drop # all spotify embeddings and deploy my own self.factors_agent[i, j, 0] = row['score'] self.factors_critic[i, j, 0] = row['score'] self.factors_agent[i, j, 1] = row['r0'] self.factors_critic[i, j, 1] = row['r0'] self.factors_agent[i, j, 2] = row['r1'] self.factors_critic[i, j, 2] = row['r1'] self.factors_agent[i, j, 3] = row['r2'] self.factors_critic[i, j, 3] = row['r2'] self.factors_agent[i, j, 4] = row['r3'] self.factors_critic[i, j, 4] = row['r3'] self.factors_agent[i, j, 5] = row['r4'] self.factors_critic[i, j, 5] = row['r4'] self.factors_agent[i, j, 6] = row['r5'] self.factors_critic[i, j, 6] = row['r5'] self.factors_agent[i, j, 7] = row['m'] self.factors_critic[i, j, 7] = row['m'] self.factors_agent[i, j, 8] = row['k'] self.factors_critic[i, j, 8] = row['k'] self.factors_agent[i, j, 9] = row['day_w'] self.factors_critic[i, j, 9] = row['sd'] self.factors_agent[i, j, 10] = row['day_m'] self.factors_critic[i, j, 10] = row['avg'] self.factors_agent[i, j, 11] = row['hour_d'] j += 1 i += 1 self.history_len = j def get_agent_reward(self, repeat): """This function gets the agent reward""" # if the track is something the user has heard before take the reward # to the (1/2) if repeat > 0: reward = math.pow(self.reward,0.5) else: reward = self.reward # Due to the square in the operation the magnitue of rward is limited # to 1E-7 due to machine precision concerns - verfied through testing if reward > 0.9999999: reward = 0.9999999 reward = torch.tensor([reward], requires_grad = True) self.reward = reward def get_critic_loss(self, current_user_history, data): """This function get the critic loss""" user = data[data.user_id == current_user_history.user_id.values[0]] user = user[['r0','r1','r2','r3', 'r4', 'r5']] user_array = user.to_numpy() # In order to use handy dandy numpy list comprehensions, we need to # make an overly bulky array for the averages both for target and for # selection ( as pssed to self.custom_loss_critic) selection_averages = [] selection_averages.append(np.average(current_user_history.r0.values)) selection_averages.append(np.average(current_user_history.r1.values)) selection_averages.append(np.average(current_user_history.r2.values)) selection_averages.append(np.average(current_user_history.r3.values)) selection_averages.append(np.average(current_user_history.r4.values)) selection_averages.append(np.average(current_user_history.r5.values)) selection_averages = np.array(selection_averages) # This line here gives selection_averages a 2nd dimension to match time # while the repeat command coppies these average values through the time # axis selection_averages = np.repeat(selection_averages[None,:], current_user_history.shape[0], axis = 0) selection_averages = selection_averages[-10:] selection_array=current_user_history[['r0','r1','r2','r3', 'r4', 'r5']] selection_array = selection_array[-10:] selection_array = selection_array.to_numpy() # Here we repeat this process for the whole user history as reflected # byuser target_averages = [] target_averages.append(np.average(user.r0.values)) target_averages.append(np.average(user.r1.values)) target_averages.append(np.average(user.r2.values)) target_averages.append(np.average(user.r3.values)) target_averages.append(np.average(user.r4.values)) target_averages.append(np.average(user.r5.values)) target_averages = np.array(target_averages) target_averages = np.repeat(target_averages[None, :], selection_array.shape[0], axis = 0) critic_loss = [] end = selection_array.shape[0] start = 0 while end < user_array.shape[0]: critic_loss.append(self.custom_loss_critic(user_array[start:end,], selection_array, selection_averages, target_averages)) start += 1 end += 1 if len(critic_loss) > 0: critic_loss = np.average(critic_loss) else: critic_loss = 0.0 critic_loss = torch.tensor([critic_loss], requires_grad = True) self.critic_loss = critic_loss def predict(self, user_history): """This function manages the training of the model based on the provided data""" self.factorize(user_history) self.pred = self.model_agent(torch.Tensor(self.factors_agent)) def propagate(self, current_user_history, data, prediction, repeat): """This function propagates the loss through the actor and critic""" self.add_prediction(prediction) # Clear out the gradients from the last prediction self.model_agent.zero_grad() self.model_critic.zero_grad() # Get the critic reward self.reward = self.model_critic(torch.Tensor(self.factors_critic)) self.get_agent_reward(repeat) # Get the agent loss and apply it agent_loss = self.loss_agent(self.reward, torch.tensor([1.0])) self.optimizer_agent.step(agent_loss.backward()) # Get the critic loss and apply it self.get_critic_loss(current_user_history, data) evaluated_critic_loss = self.loss_critic(self.critic_loss, torch.tensor([6.0])) self.optimizer_critic.step(evaluated_critic_loss.backward()) def ready_agent(self, agent_model_path, critic_model_path, train): """This function sets up a working agent - one complete with a loss function and a model""" self.is_train = train self.model_agent = torch.load(agent_model_path) self.model_critic = torch.load(critic_model_path) if self.model_agent is not None: print("Actor Model {} sucessuflly loaded.\n".format(agent_model_path)) self.model_critic = torch.load(critic_model_path) if self.model_agent is not None: print("Critic Model {} sucessuflly loaded.\n".format(critic_model_path)) def set_model_weights(self, model): """This function initilizes the weights in a pytorch model""" classname = model.__class__.__name__ if classname.find('Linear') != -1: n = model.in_features y = 1.0 / np.sqrt(n) model.weight.data.uniform_(-y,y) model.bias.data.fill(0) def wake_agent(self, train): """This function sets up a working agent - one complete with a loss function and a model""" self.is_train = train self.add_model()
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # Set which GPU to use device = torch.device('cuda', hparams.device_id) # Load hyperparameters from checkpoint (if exists) if os.path.exists(hparams.load_path): print('Load model from %s' % hparams.load_path) ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] else: if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent: if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) ckpt = None start_iter = 0 end_iter = hparams.n_iters if not hasattr(hparams, 'ask_baseline'): hparams.ask_baseline = None if not hasattr(hparams, 'instruction_baseline'): hparams.instruction_baseline = None # Set random seeds torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) np.random.seed(hparams.seed) random.seed(hparams.seed) # Create or load vocab train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt') if not os.path.exists(train_vocab_path): raise Exception('Vocab file not found at %s' % train_vocab_path) vocab = read_vocab([train_vocab_path]) hparams.instr_padding_idx = vocab.index('<PAD>') tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len) if hparams.encoder_type == 'dic': tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len) featurizer = ImageFeatures(hparams.img_features, device) simulator = Simulator(hparams) # Create train environment train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train') # Create validation environments val_splits = ['val_seen', 'val_unseen'] eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only if eval_mode: if 'val_seen' in hparams.load_path: val_splits = ['test_seen'] elif 'val_unseen' in hparams.load_path: val_splits = ['test_unseen'] else: val_splits = ['test_seen', 'test_unseen'] end_iter = start_iter + 1 if hparams.eval_on_val: val_splits = [x.replace('test_', 'val_') for x in val_splits] val_envs_tmp = { split: ( Batch(hparams, simulator, featurizer, tokenizer, split=split), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits } val_envs = {} for key, value in val_envs_tmp.items(): if '_seen' in key: val_envs[key + '_env_seen_anna'] = value val_envs[key + '_env_unseen_anna'] = value else: assert '_unseen' in key val_envs[key] = value # Build model and optimizer model = AgentModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { env_name : -1 for env_name in val_envs.keys() } best_metrics['combined'] = -1 # Load model paramters from checkpoint (if exists) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] if hparams.log_every == -1: hparams.log_every = round(len(train_env.data) / \ (hparams.batch_size * 100)) * 100 print('') pprint(vars(hparams), width=1) print('') print(model) print('Number of parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad)) if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent: assert eval_mode agent = SimpleAgent(hparams) else: agent = VerbalAskAgent(model, hparams, device) return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode)