def train_agent(restore_prior_from='data/Prior.ckpt', restore_agent_from='data/Prior.ckpt', voc_file='data/Voc', molscore_config=None, learning_rate=0.0005, batch_size=64, n_steps=3000, sigma=60, experience_replay=0): voc = Vocabulary(init_from_file=voc_file) start_time = time.time() # Scoring_function scoring_function = MolScore(molscore_config) scoring_function.log_parameters({'batch_size': batch_size, 'sigma': sigma}) print("Building RNNs") Prior = RNN(voc) Agent = RNN(voc) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): print("Cuda available, loading prior & agent") Prior.rnn.load_state_dict(torch.load(restore_prior_from)) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: print("Cuda not available, remapping to cpu") Prior.rnn.load_state_dict(torch.load(restore_prior_from, map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict(torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) # For logging purposes let's save some training parameters not captured by molscore with open(os.path.join(scoring_function.save_dir, 'reinvent_parameters.txt'), 'wt') as f: [f.write(f'{p}: {v}\n') for p, v in {'learning_rate': learning_rate, 'batch_size': batch_size, 'n_steps': n_steps, 'sigma': sigma, 'experience_replay': experience_replay}.items()] # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefore not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) # Using molscore instead here try: score = scoring_function(smiles, step=step) augmented_likelihood = prior_likelihood + sigma * Variable(score) except: # If anything goes wrong with molscore, write scores and save .ckpt and kill monitor with open(os.path.join(scoring_function.save_dir, f'failed_smiles_{scoring_function.step}.smi'), 'wt') as f: [f.write(f'{smi}\n') for smi in smiles] torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt')) scoring_function.write_scores() scoring_function.kill_dash_monitor() raise # Calculate loss loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience)>4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat((agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = - (1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print(f"\n Step {step} Fraction valid SMILES: {fraction_valid_smiles(smiles) * 100:4.1f}\ Time elapsed: {time_elapsed:.2f}h Time left: {time_left:.2f}h") print(" Agent Prior Target Score SMILES") for i in range(10): print(f" {agent_likelihood[i]:6.2f} {prior_likelihood[i]:6.2f} {augmented_likelihood[i]:6.2f} {score[i]:6.2f} {smiles[i]}") # Save the agent weights every 250 iterations #### if step % 250 == 0 and step != 0: torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt')) # If the entire training finishes, write out MolScore dataframe, kill dash_utils monitor and # save the final Agent.ckpt torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{n_steps}.ckpt')) scoring_function.write_scores() scoring_function.kill_dash_monitor() return
def init_h(self, batch_size): # Initial cell state is zero return Variable(torch.zeros(3, batch_size, 512))
kwargs = {'num_workers': 0, 'pin_memory': True} valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) conf_thresh = 0.005 nms_thresh = 0.45 metrics = [] labels = [] for batch_idx, (data, targets) in enumerate( tqdm.tqdm(valid_loader, desc="Detecting objects")): data = data.cuda() data = Variable(data, volatile=True) output = m(data).data batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes, m.anchors, m.num_anchors, 0, 1) for i in range(output.size(0)): targets_i = targets[i] boxes = batch_boxes[i] boxes = nms(boxes, nms_thresh) width, height = get_image_size(valid_files[i]) targets_i = rescale_target(targets_i, width, height) labels += targets_i[:, 0].tolist() prediction = boxes_to_prediction(boxes, width, height) metrics += get_prediction_metrics(prediction, targets_i) show_eval_result(metrics, labels)
def init_h(self, batch_size, latent_vectors): # Initial cell state is zero #return Variable(torch.zeros(3, batch_size, 330)) # or Initial cell state is latent vector return Variable(latent_vectors.repeat(3, 1, 1))
def train_agent(restore_prior_from='data/Prior.ckpt', restore_agent_from='data/Prior.ckpt', scoring_function='tanimoto', scoring_function_kwargs=None, save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=3000, num_processes=0, sigma=60, experience_replay=0): voc = Vocabulary(init_from_file='data/DistributionLearningBenchmark/Voc') start_time = time.time() Prior = RNN(voc) Agent = RNN(voc) logger = VizardLog('data/logs') # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt')) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Prior.rnn.load_state_dict( torch.load('data/Prior.ckpt', map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) # Log some network weights that can be dynamically plotted with the Vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Need this for Vizard plotting step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # Log some weights logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "weight_GRU_layer_2_b_hh") logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \ (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True) logger.log(np.array(step_score), "Scores") # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py")) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt')) seqs, agent_likelihood, entropy = Agent.sample(256) prior_likelihood, _ = Prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood))
def __getitem__(self, i): mol = self.smiles[i] tokenized = self.voc.tokenize(mol) encoded = self.voc.encode(tokenized) return Variable(encoded)
def sample(self, pattern="CC(*)CC", batch_size=128, max_length=140): """ Only difference with classic RNN based sampling. Sample a batch of sequences with given scaffold. Args: pattern: Scaffold that need to be respected distributions: Distribution on the length of batch_size : Number of sequences to sample max_length: Maximum length of the sequences Outputs: seqs: (batch_size, seq_length) The sampled sequences. log_probs : (batch_size) Log likelihood for each sequence. entropy: (batch_size) The entropies for the sequences. Not currently used. """ # get the tokenized version of the pattern pattern = np.array(tokenize_custom(pattern)) start_token = Variable(torch.zeros(batch_size).long()) start_token[:] = self.voc.vocab['GO'] h = self.rnn.init_h(batch_size) x = start_token sequences = [] log_probs = Variable(torch.zeros(batch_size)) finished = torch.zeros(batch_size).byte() entropy = Variable(torch.zeros(batch_size)) if torch.cuda.is_available(): finished = finished.cuda() # tracks if there is an opened parenthesis opened = np.array(np.zeros(shape=batch_size), dtype=bool) # tracks if there is a constrained choice constrained_choices = np.array(np.zeros(shape=batch_size), dtype=bool) # tracks number of opening and closing parentheses opening_parentheses = np.ones(shape=batch_size) closing_parentheses = np.zeros(shape=batch_size) # tracks number of steps in the fragment that is being sampled # (if the RNN never samples the matching parenthesis we terminate sampling of this given molecule n_steps = np.zeros(shape=batch_size) # tracks opened cycles opened_cycles = [[ 'A', ] for i in range(batch_size)] counts = np.zeros(shape=batch_size, dtype=int) # tracks the position in the scaffold's pattern trackers = np.zeros(shape=batch_size, dtype=int) current_pattern_indexes = np.array( [pattern[index] for index in trackers]) for step in range(max_length): # Getting the position in the pattern of every example in the batch previous_pattern_indexes = current_pattern_indexes current_pattern_indexes = np.array( [pattern[index] for index in trackers]) # Check if a decoration is currently opened opened = np.logical_or( np.logical_and(current_pattern_indexes == '*', previous_pattern_indexes == '('), opened) # And if we're heading to a constrained choice constrained_choices = np.array( [x[0] == '[' and ',' in x for x in current_pattern_indexes], dtype=bool) # In this case we already sampled this branch and need to move on for one step in the pattern trackers += 1 * np.logical_and(current_pattern_indexes == '*', previous_pattern_indexes == '(') # Sample according to conditional probability distribution of the RNN logits, h = self.rnn(x, h) prob = F.softmax(logits) log_prob = F.log_softmax(logits) x = torch.multinomial(prob, num_samples=1).view(-1) # If not opened, replace with current pattern token, else keep the sample # And update number of opened and closed parentheses # If closed, resume to opened # iterating over the batch: # there might be a smart way to parallelize all this but we didn't focus on it # as sampling speed is not necessarly a bottleneck in our applications for i in range(batch_size): # to keep track of opening and closing parentheses is_open = opened[i] if is_open: n_steps[i] += 1 if n_steps[i] > 50: x[i] = self.voc.vocab['EOS'] opening_parentheses[i] += ( x[i] == self.voc.vocab['(']).byte() * 1 closing_parentheses[i] += ( x[i] == self.voc.vocab[')']).byte() * 1 n_opened = opening_parentheses[i] n_closed = closing_parentheses[i] if (n_opened == n_closed): opening_parentheses[i] += 1 opened[i] = False trackers[i] += 1 # if we have a constrained choice # we apply a mask on the probability vector elif constrained_choices[i]: choices = current_pattern_indexes[i][1:-1].split(',') probabilities = prob[i, :] mask = torch.zeros_like(probabilities) for choice in choices: mask[self.voc.vocab[choice]] = 1 probabilities *= mask probabilities /= torch.sum(probabilities, dim=-1) x[i] = torch.multinomial(probabilities, num_samples=1).view(-1) trackers[i] += 1 * (x[i] != self.voc.vocab['EOS']).byte() # In this case we need to sample # We make the distinction between branch (first case) and linked (second case) elif current_pattern_indexes[i] == '*': if pattern[trackers[i]] == ')': n_steps[i] += 1 if n_steps[i] > 50: x[i] = self.voc.vocab['EOS'] opening_parentheses[i] += ( x[i] == self.voc.vocab['(']).byte() * 1 closing_parentheses[i] += ( x[i] == self.voc.vocab[')']).byte() * 1 n_opened = opening_parentheses[i] n_closed = closing_parentheses[i] if (n_opened == n_closed): opening_parentheses[i] += 1 opened[i] = False trackers[i] += 1 else: # The following lines are to avoid that sampling finishes too early probabilities = prob[i, :] mask = torch.ones_like(probabilities) mask[self.voc.vocab['EOS']] = 0 probabilities *= mask probabilities /= torch.sum(probabilities, dim=-1) x[i] = torch.multinomial(probabilities, num_samples=1).view(-1) opening_parentheses[i] += ( x[i] == self.voc.vocab['(']).byte() * 1 closing_parentheses[i] += ( x[i] == self.voc.vocab[')']).byte() * 1 n_opened = opening_parentheses[i] n_closed = closing_parentheses[i] for cycle in range(1, 10): if (x[i] == self.voc.vocab[str(cycle)] ).byte() and (cycle in opened_cycles[i]): opened_cycles[i].remove(cycle) break elif (x[i] == self.voc.vocab[str(cycle)]).byte(): opened_cycles[i].append(cycle) break # Override with specified distribution for minimal fragment size # You could also make this an argument of the sample function # You can also keep this parameter fixed manually as it currently is # The sampling of the linker will only stop when size is > to minimal_linked_size # and cycles and branches are completed minimal_linker_size = 5 if (n_opened == n_closed + 1) and len( opened_cycles[i] ) == 1 and counts[i] > minimal_linker_size: opening_parentheses[i] += 1 opened[i] = False trackers[i] += 1 else: counts[i] += 1 # If we avoided all previous cases, then we do not sample and instead read the pattern else: x[i] = self.voc.vocab[current_pattern_indexes[i]] trackers[i] += 1 * (x[i] != self.voc.vocab['EOS']).byte() if (x[i] == self.voc.vocab[')']).byte(): opened[i] = False sequences.append(x.view(-1, 1)) log_probs += NLLLoss(log_prob, x) entropy += -torch.sum((log_prob * prob), 1) x = Variable(x.data) EOS_sampled = (x == self.voc.vocab['EOS']).byte() finished = torch.ge(finished + EOS_sampled, 1) if torch.prod(finished) == 1: break sequences = torch.cat(sequences, 1) return sequences.data, log_probs, entropy
import numpy as np from utils.Variable import * from utils.Functions import * f = Square() g = Exp() x = Variable(np.array(0.5)) fx = f(x) gfx = g(fx) print(gfx.data)
def init_h(self, batch_size): # Initial cell state is zero return Variable( torch.zeros(self._num_gru_layers, batch_size, self._gru_layer_size))
def train_agent(restore_agent_from='data/Prior.ckpt', scoring_function='activity_model', save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=1000, sigma=100): voc = Vocabulary(init_from_file="data/voc") start_time = time.time() Prior = RNN(voc) Agent = RNN(voc) if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt')) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Prior.rnn.load_state_dict( torch.load('data/Prior.ckpt', map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) scoring_function = get_scoring_function(scoring_function=scoring_function) step_score = [[], []] print("Model initialized, starting training...") if not save_dir: save_dir = 'experiments/manuscript/1000steps_probtest_rewardonlynosmaller40_' + time.strftime( "%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) ## calcualte the probability of psmiles with predicted TC >= 0.4 prob = [] mean_ = [] std_ = [] for step in range(n_steps): seqs, agent_likelihood, entropy = Agent.sample(batch_size) unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = [] for seq in seqs.cpu().numpy(): smiles.append(voc.decode(seq)) score = scoring_function(smiles) #### count = 0 score_filter = [] for s in score: if s >= 0.4: score_filter.append(s) count += 1 else: pass prob.append(count / 64) mean_.append(np.mean(score_filter)) std_.append(np.std(score_filter)) #### augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) loss = loss.mean() regularization = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * regularization optimizer.zero_grad() loss.backward() optimizer.step() # print out information during the training print("Agent Prior Target Score SMILES") for i in range(10): print("{:6.3f} {:6.3f} {:6.3f} {:6.3f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # if step > 98 and (step+1) % 100 == 0: # # if step == 0: # torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'agent_baseline_{}.ckpt'.format(step+1))) # seqs, agent_likelihood, entropy = Agent.sample(1000) # prior_likelihood, _ = Prior.likelihood(Variable(seqs)) # prior_likelihood = prior_likelihood.data.cpu().numpy() # smiles = [] # for seq in seqs.cpu().numpy(): # smiles.append(voc.decode(seq)) # score = scoring_function(smiles) # with open(os.path.join(save_dir, "sampled_{}".format(step+1)), 'w') as f: # f.write("SMILES Score PriorLogP\n") # for s, sc, pri in zip(smiles, score, prior_likelihood): # f.write("{} {:5.3f} {:6.3f}\n".format(s, sc, pri)) step_score_data = pd.DataFrame({ 'Step': step_score[0], 'Score': step_score[1], 'Prob': prob, 'MEAN': mean_, 'STD': std_ }) step_score_data.to_csv(os.path.join(save_dir, "step_score_1000step.csv"), index=None)
variables = {} default = OrderedDict() default['mean'] = 0 default['std'] = 1 positive_bias = OrderedDict() positive_bias['mean'] = 0.5 positive_bias['std'] = 1 negative_bias = OrderedDict() negative_bias['mean'] = -0.5 negative_bias['std'] = 1 # Experimental parameters # Task importance variables['TI'] = Variable(1, 'TI', 'fixed', {'fixed': 1}) # Controlled parameters variables['SA'] = Variable(0, 'SA', 'Normal', positive_bias) variables['PI'] = Variable(0, 'PI', 'Normal', negative_bias) variables['success'] = 0 # IT Process parameters variables['skill'] = Variable(1, 'skill', 'Normal', default) variables['effort'] = Variable(1, 'effort', 'Normal', default) variables['external'] = Variable(0, 'external', 'Normal', default) variables['luck'] = Variable(0, 'luck', 'Normal', default) # Inferencee parameters variables['L'] = 100 variables['f'] = {'mean': lambda x: x, '2ndMoment': lambda x: x**2}
def train_agent(runname='celecoxib', priorname='chembl', scoring_function='Tanimoto', scoring_function_kwargs=None, save_dir=None, batch_size=64, n_steps=3000, num_processes=6, sigma=60, experience_replay=5, lr=0.0005): print("\nStarting run %s with prior %s ..." % (runname, priorname)) start_time = time.time() voc = Vocabulary(init_from_file="data/Voc_%s" % priorname) prior = RNN(voc) agent = RNN(voc) writer = SummaryWriter('logs/%s' % runname) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) else: prior.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) agent.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_ids = unique(seqs) seqs = seqs[unique_ids] agent_likelihood = agent_likelihood[unique_ids] entropy = entropy[unique_ids] # Get prior likelihood and score prior_likelihood, _ = prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) best_memory = experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h\n" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Log writer.add_scalar('loss', loss.item(), step) writer.add_scalar('score', np.mean(score), step) writer.add_scalar('entropy', entropy.mean(), step) if best_memory: writer.add_scalar('best_memory', best_memory, step) # get 4 random valid smiles and scores for logging val_ids = np.array( [i for i, s in enumerate(smiles) if is_valid_mol(s)]) val_ids = np.random.choice(val_ids, 4, replace=False) smiles = np.array(smiles)[val_ids] score = ['%.3f' % s for s in np.array(score)[val_ids]] writer.add_image('generated_mols', mol_to_torchimage(smiles, score), step) # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'results/%s' % runname + time.strftime( "%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname)) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(agent.rnn.state_dict(), os.path.join(save_dir, 'Agent_%s.ckpt' % runname)) seqs, agent_likelihood, entropy = agent.sample(256) prior_likelihood, _ = prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled.txt"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood)) print("\nDONE! Whole run took %s" % datetime.timedelta(seconds=time.time() - start_time))
def poem_to_tensor(poem, vocab, is_target=False): word_indexes = [vocab.index(word) for word in poem] if is_target: word_indexes.append(vocab.index('<EOP>')) return Variable(torch.LongTensor(word_indexes))