def _time_progress(self, start_time, n_steps, step, smiles, mean_score): time_elapsed = int(time.time() - start_time) time_left = (time_elapsed * ((n_steps - step) / (step + 1))) valid_fraction = fraction_valid_smiles(smiles) message = ( f"\n Step {step} Fraction valid SMILES: {valid_fraction:4.1f} Score: {mean_score:.4f} " f"Time elapsed: {time_elapsed} " f"Time left: {time_left:.1f}\n") return message
def timestep_report(self, smiles: [str], likelihoods: np.array): fraction_valid_smiles = utils_general.fraction_valid_smiles(smiles) fraction_unique_entries = self._get_unique_entires_fraction( likelihoods) structures_table = self._visualize_structures(smiles) data = self._assemble_timestep_report(structures_table, fraction_valid_smiles, fraction_unique_entries) self._notify_server(data, self._log_config.recipient)
def timestep_report(self, start_time, n_steps, step, smiles, mean_score: np.array, score_summary: FinalSummary, score, agent_likelihood: torch.tensor, prior_likelihood: torch.tensor, augmented_likelihood: torch.tensor): score_components = self._score_summary_breakdown(score_summary, mean_score) learning_curves = self._learning_curve_profile(agent_likelihood, prior_likelihood, augmented_likelihood) structures_table = self._visualize_structures(smiles, score, score_summary) smiles_report = self._create_sample_report(smiles, score, score_summary) time_estimation = ul_rl.estimate_run_time(start_time, n_steps, step) data = self._assemble_timestep_report(step, score_components, structures_table, learning_curves, time_estimation, fraction_valid_smiles(smiles), smiles_report) self._notify_server(data, self._log_config.recipient)
def log_timestep(self, lr, epoch, sampled_smiles, sampled_nlls, validation_nlls, training_nlls, jsd_data, jsd_joined_data, model): learning_mean = self._mean_learning_curve_profile( sampled_nlls, training_nlls) learning_variation = self._variation_learning_curve_profile( sampled_nlls, training_nlls) fraction_valid_smiles = utils_general.fraction_valid_smiles( sampled_smiles) structures_table = self._visualize_structures(sampled_smiles) data = self._assemble_timestep_report(epoch, fraction_valid_smiles, structures_table, learning_mean, learning_variation, sampled_nlls, training_nlls) self._notify_server(data, self._log_config.recipient)
def _tensorboard_report(self, step, smiles, score, score_summary: FinalSummary, agent_likelihood, prior_likelihood, augmented_likelihood): self._summary_writer.add_scalars( "nll/avg", { "prior": prior_likelihood.mean(), "augmented": augmented_likelihood.mean(), "agent": agent_likelihood.mean() }, step) mean_score = np.mean(score) for i, log in enumerate(score_summary.profile): self._summary_writer.add_scalar( score_summary.profile[i].name, np.mean(score_summary.profile[i].score), step) self._summary_writer.add_scalar("average score", mean_score, step) self._summary_writer.add_scalar("Fraction valid SMILES", fraction_valid_smiles(smiles), step) if step % 10 == 0: self._log_out_smiles_sample(smiles, score, step, score_summary)
def reinforcement_learning(prior: models.reinvent.Model, agent: models.reinvent.Model, scoring_function: Callable, logdir: str, resultdir: str, n_steps=3000, sigma=120, experience_replay=False, lr=0.0001, batch_size=128, save_every=50, keep_max=10, reset=0, temperature=1.0, reset_score_cutoff=0.5): assert prior.voc == agent.voc, "The agent and the prior must have the same vocabulary!" start_time = time.time() # We don't need gradients with respect to Prior for param in prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. if experience_replay: experience = Experience(prior.voc) # Can add an initial experience if we want to # experience.initiate_from_file('/home/excape/reinvent/tala_xray_lig.smi', scoring_function, Prior) reset_countdown = 0 for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = agent.sample(batch_size, temperature=temperature) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = prior.likelihood(Variable(seqs), temperature=temperature) smiles = prior.sequence_to_smiles(seqs) score_components = scoring_function(smiles) # we need to extract the total_score key score = score_components.pop('total_score') # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(8) exp_agent_likelihood, exp_entropy = agent.likelihood( exp_seqs.long(), temperature=temperature) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) if experience_replay: experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence # With this regularizer the example where only Celecoxib is generated # doesnt work for obvious reasons. # loss_p = - (1 / agent_likelihood).mean() # loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Now optimize with respect to the entropy entropy = torch.sum(entropy) # loss.backward() # optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = int(time.time() - start_time) time_left = (time_elapsed * ((n_steps - step) / (step + 1))) mean_score = np.mean(score) message = ( "\n Step {} Fraction valid SMILES: {:4.1f} Score: {:.4f} Time elapsed: {} " "Time left: {:.1f}\n").format(step, fraction_valid_smiles(smiles) * 100, mean_score, time_elapsed, time_left) message += " ".join([" Agent", "Prior", "Target", "Score"] + list(score_components.keys()) + ["SMILES\n"]) for i in range(min(10, len(smiles))): print_component_scores = [ score_components[key][i] for key in score_components ] message += " {:6.2f} {:6.2f} {:6.2f} {:6.2f} ".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i]) message += ("{:6.2f} " * len(print_component_scores)).format( *print_component_scores) message += "{}\n".format(smiles[i]) logging.info(message) if step % save_every == 0: logging.debug("Write Agent memory") agent.save(os.path.join(logdir, 'Agent.{}.ckpt'.format(step))) if keep_max > 0: for oldsteps in range(0, step - (keep_max * save_every) + 1, save_every): with contextlib.suppress(FileNotFoundError): os.remove( os.path.join(logdir, 'Agent.{}.ckpt'.format(oldsteps))) logging.debug("Entropy: {}".format(entropy)) # reset the weight of NN to search for diverse solutions if reset: if reset_countdown: reset_countdown += 1 elif mean_score >= reset_score_cutoff: reset_countdown = 1 if reset_countdown == reset: agent.reset() reset_countdown = 0 logging.debug("Agent RNN is reset!") # If the entire training finishes, we create a new folder where we save some sampled # sequences and the contents of the experinence (which are the highest scored # sequences seen during training) if not os.path.isdir(resultdir): os.makedirs(resultdir) agent.save(os.path.join(resultdir, 'Agent.ckpt')) if experience_replay: experience.print_memory(os.path.join(resultdir, "experience_memory")) # copy the output.log as well copyfile(os.path.join(logdir, "output.log"), os.path.join(resultdir, "output.log")) copyfile(os.path.join(logdir, "debug.log"), os.path.join(resultdir, "debug.log")) # copy metadata copyfile(os.path.join(logdir, "metadata.json"), os.path.join(resultdir, "metadata.json"))
def hill_climbing(pattern=None, restore_agent_from='data/Prior.ckpt', scoring_function='tanimoto', scoring_function_kwargs=None, save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=10, num_processes=0, use_custom_voc="data/Voc"): voc = Vocabulary(init_from_file=use_custom_voc) start_time = time.time() if pattern: Agent = scaffold_constrained_RNN(voc) else: Agent = RNN(voc) logger = VizardLog('data/logs') if torch.cuda.is_available(): Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) # Log some network weights that can be dynamically plotted with the Vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent if pattern: seqs, agent_likelihood, entropy = Agent.sample(pattern, batch_size) else: seqs, agent_likelihood, entropy = Agent.sample(batch_size) gc.collect() # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) new_experience = zip(smiles, score, agent_likelihood) experience.add_experience(new_experience) indexes = np.flip(np.argsort(np.array(score))) # Train the agent for 10 epochs on hill-climbing procedure for epoch in range(10): loss = Variable(torch.zeros(1)) counter = 0 seen_seqs = [] for j in indexes: if counter > 50: break seq = seqs[j] s = smiles[j] if s not in seen_seqs: seen_seqs.append(s) log_p, _ = Agent.likelihood(Variable(seq).view(1, -1)) loss -= log_p.mean() counter += 1 loss /= counter optimizer.zero_grad() loss.backward() optimizer.step() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {}".format(score[i], smiles[i])) # Need this for Vizard plotting step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # Log some weights logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "weight_GRU_layer_2_b_hh") logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \ (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True) logger.log(np.array(step_score), "Scores") # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) try: os.makedirs(save_dir) except: print("Folder already existing... overwriting previous results") copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py")) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt')) previous_smiles = [] with open(os.path.join(save_dir, "memory.smi"), 'w') as f: for i, exp in enumerate(experience.memory): try: if Chem.MolToSmiles( Chem.rdmolops.RemoveStereochemistry( Chem.MolFromSmiles( exp[0]))) not in previous_smiles: f.write("{}\n".format(exp[0])) previous_smiles.append( Chem.MolToSmiles( Chem.rdmolops.RemoveStereochemistry( Chem.MolFromSmiles(exp[0])))) except: pass
def train_agent(restore_prior_from='data/Prior.ckpt', restore_agent_from='data/Prior.ckpt', scoring_function='tanimoto', scoring_function_kwargs=None, save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=3000, num_processes=0, sigma=60, experience_replay=0): voc = Vocabulary(init_from_file="data/Voc") start_time = time.time() Prior = RNN(voc) Agent = RNN(voc) logger = VizardLog('data/logs') # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load(restore_prior_from)) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Prior.rnn.load_state_dict( torch.load(restore_prior_from, map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) # Log some network weights that can be dynamically plotted with the Vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Need this for Vizard plotting step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # Log some weights logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "weight_GRU_layer_2_b_hh") logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \ (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True) logger.log(np.array(step_score), "Scores") # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py")) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt')) seqs, agent_likelihood, entropy = Agent.sample(256) prior_likelihood, _ = Prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood))
def _valid_stats(self, smiles, epoch): self._summary_writer.add_scalar("valid", fraction_valid_smiles(smiles), epoch)
def train_agent(runname='celecoxib', priorname='chembl', scoring_function='Tanimoto', scoring_function_kwargs=None, save_dir=None, batch_size=64, n_steps=3000, num_processes=6, sigma=60, experience_replay=5, lr=0.0005): print("\nStarting run %s with prior %s ..." % (runname, priorname)) start_time = time.time() voc = Vocabulary(init_from_file="data/Voc_%s" % priorname) prior = RNN(voc) agent = RNN(voc) writer = SummaryWriter('logs/%s' % runname) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) else: prior.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) agent.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_ids = unique(seqs) seqs = seqs[unique_ids] agent_likelihood = agent_likelihood[unique_ids] entropy = entropy[unique_ids] # Get prior likelihood and score prior_likelihood, _ = prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) best_memory = experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h\n" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Log writer.add_scalar('loss', loss.item(), step) writer.add_scalar('score', np.mean(score), step) writer.add_scalar('entropy', entropy.mean(), step) if best_memory: writer.add_scalar('best_memory', best_memory, step) # get 4 random valid smiles and scores for logging val_ids = np.array( [i for i, s in enumerate(smiles) if is_valid_mol(s)]) val_ids = np.random.choice(val_ids, 4, replace=False) smiles = np.array(smiles)[val_ids] score = ['%.3f' % s for s in np.array(score)[val_ids]] writer.add_image('generated_mols', mol_to_torchimage(smiles, score), step) # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'results/%s' % runname + time.strftime( "%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname)) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(agent.rnn.state_dict(), os.path.join(save_dir, 'Agent_%s.ckpt' % runname)) seqs, agent_likelihood, entropy = agent.sample(256) prior_likelihood, _ = prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled.txt"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood)) print("\nDONE! Whole run took %s" % datetime.timedelta(seconds=time.time() - start_time))
def _log_timestep(self, smiles: np.array, likelihoods: np.array): fraction_valid_smiles = utils_general.fraction_valid_smiles(smiles) fraction_unique_entries = self._get_unique_entires_fraction(likelihoods) self._visualize_structures(smiles) self._summary_writer.add_text('Data', f'Valid SMILES: {fraction_valid_smiles}% ' f'Unique Mols: {fraction_unique_entries}% ')