def generate_smiles(n_smiles=500, restore_from="data/Prior.ckpt", voc_file="data/Voc", embedding_size=128): """ This function takes a checkpoint for a trained RNN and the vocabulary file and generates n_smiles new smiles strings. """ n = 32 n_smiles = n_smiles - n_smiles % n print("Generating %i smiles" % n_smiles) voc = Vocabulary(init_from_file=voc_file) generator = RNN(voc, embedding_size) if torch.cuda.is_available(): generator.rnn.load_state_dict(torch.load(restore_from)) else: generator.rnn.load_state_dict( torch.load(restore_from, map_location=lambda storage, loc: storage)) all_smiles = [] for i in range(int(n_smiles / n)): sequences, _, _ = generator.sample(n) smiles = seq_to_smiles(sequences, voc) all_smiles += smiles # Freeing up memory del generator torch.cuda.empty_cache() return all_smiles
def main(voc_file='data/Voc', restore_model_from='data/Prior.ckpt', output_file='data/Prior_10k.smi', sample_size=10000): voc = Vocabulary(init_from_file=voc_file) print("Setting up networks") Agent = RNN(voc) if torch.cuda.is_available(): print("Cuda available, loading prior & agent") Agent.rnn.load_state_dict(torch.load(restore_model_from)) else: raise 'Cuda not available' SMILES = [] for n in tqdm(range(sample_size//100), total=sample_size//100): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(100) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] smiles = seq_to_smiles(seqs, voc) SMILES += smiles if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, "wt") as f: [f.write(smi + '\n') for smi in SMILES] return
def test_sample(self): input_size = 5 hidden_size = 32 rnn = RNN(input_size, hidden_size=hidden_size) stop_token = 4 bytestring, probs, entropies = rnn.sample(stop_token, maxlen=20) self.assertTrue(max(bytestring) < rnn.input_size) self.assertTrue(min(bytestring) >= 0)
def train_model(): """Do transfer learning for generating SMILES""" voc = Vocabulary(init_from_file='data/Voc') cano_smi_file('refined_smii.csv', 'refined_smii_cano.csv') moldata = MolData('refined_smii_cano.csv', voc) # Monomers 67 and 180 were removed because of the unseen [C-] in voc # DAs containing [se] [SiH2] [n] removed: 38 molecules data = DataLoader(moldata, batch_size=64, shuffle=True, drop_last=False, collate_fn=MolData.collate_fn) transfer_model = RNN(voc) if torch.cuda.is_available(): transfer_model.rnn.load_state_dict(torch.load('data/Prior.ckpt')) else: transfer_model.rnn.load_state_dict( torch.load('data/Prior.ckpt', map_location=lambda storage, loc: storage)) # for param in transfer_model.rnn.parameters(): # param.requires_grad = False optimizer = torch.optim.Adam(transfer_model.rnn.parameters(), lr=0.001) for epoch in range(1, 10): for step, batch in tqdm(enumerate(data), total=len(data)): seqs = batch.long() log_p, _ = transfer_model.likelihood(seqs) loss = -log_p.mean() optimizer.zero_grad() loss.backward() optimizer.step() if step % 5 == 0 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write('*' * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data[0])) seqs, likelihood, _ = transfer_model.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + '\n') torch.save(transfer_model.rnn.state_dict(), "data/transfer_model2.ckpt") torch.save(transfer_model.rnn.state_dict(), "data/transfer_modelw.ckpt")
def pretrain(restore_from=None): """Trains the Prior RNN""" # Read vocabulary from a file voc = Vocabulary(init_from_file="data/Voc") # Create a Dataset from a SMILES file moldata = MolData("data/mols_filtered.smi", voc) data = DataLoader(moldata, batch_size=128, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) Prior = RNN(voc) # Can restore from a saved RNN if restore_from: Prior.rnn.load_state_dict(torch.load(restore_from)) optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr=0.001) for epoch in range(1, 6): # When training on a few million compounds, this model converges # in a few of epochs or even faster. If model sized is increased # its probably a good idea to check loss against an external set of # validation SMILES to make sure we dont overfit too much. for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, _ = Prior.likelihood(seqs) loss = -log_p.mean() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Every 500 steps we decrease learning rate and print some information if step % 500 == 0 and step != 0 and False: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write("*" * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data[0])) seqs, likelihood, _ = Prior.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n")
def Transfer(restore_from = None): """Trains the Prior RNN""" voc = Vocabulary(init_from_file="./Voc") moldata = MolData("tl_filtered.smi", voc) data = DataLoader(moldata, batch_size=32, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) Prior = RNN(voc) # Can restore from a saved RNN if restore_from: Prior.rnn.load_state_dict(torch.load(restore_from)) optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr = 0.001) for epoch in range(1, 101): for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, _ = Prior.likelihood(seqs) loss = - log_p.mean() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Every 2 epoch we decrease learning rate and print some information if epoch % 2 == 0 and step == 1: #decrease_learning_rate(optimizer, decrease_by=0.03) decrease_learning_rate(optimizer, decrease_by=0.03) if epoch % 10 == 0 and step == 1: tqdm.write("*" * 50) tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format(epoch, step, loss.data[0])) seqs, likelihood, _ = Prior.sample(100) valid = 0 f = open('tran_output.smi', 'a') for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 f.write(smile + "\n") if i < 10: tqdm.write(smile) f.close() tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n") # Save the Prior torch.save(Prior.rnn.state_dict(), "data/100_epochs_transfer.ckpt")
def main(): args = parse_args() config = parse_config(args) if args.gpu and torch.cuda.is_available(): torch.cuda.manual_seed_all(config.seed) device = torch.device('cuda') else: device = torch.device('cpu') # Create character-level RNN for data of the form in dataset dataset = StudentInteractionsDataset( csv_file='data/naive_c5_q50_s4000_v1.csv', root_dir='data/') rnn = RNN(voc_len=dataset.voc_len, voc_freq=dataset.voc_freq, embedding_dim=config.embedding_dim, num_lstm_units=config.num_lstm_units, num_lstm_layers=config.num_lstm_layers, device=device) if args.train: # Split data into train and test sets train_size = int(0.8 * dataset.data.shape[0]) test_size = dataset.data.shape[0] - train_size train_set, test_set = random_split(dataset, [train_size, test_size]) # Create train and test dataloaders train_loader = DataLoader(dataset=train_set, batch_size=config.batch_size, shuffle=True) test_loader = DataLoader(dataset=test_set, batch_size=config.batch_size, shuffle=True) train(args, rnn, train_loader, test_loader) else: rnn.load_state_dict( torch.load(os.path.join(args.checkpoint_dir, 'model-07150.pt'), map_location=device)) print("RNN weights restored.") samples = [] for i in range(args.num_samples): samples.append(rnn.sample(SAMPLE_SEQ_LEN)) samples = pd.DataFrame(samples) file_path = "data/generated/samples_" + str(args.num_samples) + ".csv" samples.to_csv(file_path, index=False)
def black_box(load_weights='./data/Prior.ckpt', batch_size=1): # Read vocabulary from a file voc = Vocabulary(init_from_file="data/Voc") vec_file = "data/vecs.dat" _, mew, std = get_latent_vector(None, vec_file, moments=True) vector = np.array([4.2619, 214.96, 512.07, 0.0, 1.0, 0.088, 7.0, 5.0, 100.01, 60.95, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 10.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 23.0, 0.0, 0.0, 25.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 34.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 14.0, 8.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 9.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 8.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 6.0, 0.0, 2.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 3.0, 28.0, 1.0, 5.0, 0.0, 2.0, 10.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 2.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 5.0, 0.0, 5.0, 7.0, 4.0, 2.0, 0.0, 16.0, 20.0, 43.0, 83.0, 90.0, 23.0, 8.0, 37.0, 5.0, 24.0, 5.0, 4.0, 16.0, 5.0, 25.0, 93.0, 92.0, 38.0, 0.0, 0.0, 0.0, 4.0]) vector = (vector - mew) / std data = [vector] Prior = RNN(voc, len(data[0])) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load(load_weights)) else: Prior.rnn.load_state_dict(torch.load(load_weights, map_location=lambda storage, loc: storage)) for test_vec in data: print('Test vector {}'.format(test_vec)) test_vec = Variable(test_vec).float() valid = 0 num_smi = 100 all_smi = [] for i in range(num_smi): seqs, prior_likelihood, entropy = Prior.sample(batch_size, test_vec) smiles = seq_to_smiles(seqs, voc)[0] if Chem.MolFromSmiles(smiles): valid += 1 all_smi.append(smiles) for smi in all_smi: print(smi) print("\n{:>4.1f}% valid SMILES".format(100 * valid / len(range(num_smi))))
def Sample(filename, enumerate_number): voc = Vocabulary(init_from_file="./Voc") Prior = RNN(voc) print(filename, enumerate_number) # Can restore from a saved RNN Prior.rnn.load_state_dict(torch.load(filename)) totalsmiles = set() enumerate_number = int(enumerate_number) molecules_total = 0 for epoch in range(1, 10000): seqs, likelihood, _ = Prior.sample(100) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 totalsmiles.add(smile) molecules_total = len(totalsmiles) print(("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs)))) print(valid, molecules_total, epoch) if molecules_total > enumerate_number: break return totalsmiles
def pretrain(runname='chembl', restore_from=None): """Trains the prior RNN""" writer = SummaryWriter('logs/%s' % runname) # Read vocabulary from a file voc = Vocabulary(init_from_file="data/Voc_%s" % runname) # Create a Dataset from a SMILES file moldata = MolData("data/mols_%s_filtered.smi" % runname, voc) data = DataLoader(moldata, batch_size=128, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) prior = RNN(voc) # writer.add_graph(prior.rnn, data.dataset[0]) # Can restore from a saved RNN if restore_from: prior.rnn.load_state_dict(torch.load(restore_from)) optimizer = torch.optim.Adam(prior.rnn.parameters(), lr=0.001) running_loss = 0.0 for epoch in range(1, 6): # When training on a few million compounds, this model converges # in a few of epochs or even faster. If model sized is increased # its probably a good idea to check loss against an external set of # validation SMILES to make sure we don't overfit too much. for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, entropy = prior.likelihood(seqs) loss = -log_p.mean() running_loss += loss.item() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Every 250 steps we decrease learning rate and print some information if step % 250 == 249 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) seqs, likelihood, _ = prior.sample(128) valid = 0 smiles = list() for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if valid < 5: tqdm.write(smile) smiles.append(smile.strip()) tqdm.write("*" * 50) tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format(epoch, step, running_loss / step)) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n") torch.save(prior.rnn.state_dict(), "data/prior_%s.ckpt" % runname) writer.add_scalar('training loss', running_loss / 250, epoch * len(data) + step) writer.add_scalar('valid_smiles', 100 * valid / len(seqs), epoch * len(data) + step) writer.add_image('sampled_mols', mol_to_torchimage(smiles)) running_loss = 0.0 # Save the prior and close writer torch.save(prior.rnn.state_dict(), "data/prior_%s.ckpt" % runname) writer.close()
def train(args): if args.create_dataset: df = pd.read_csv("../data/endpoints_calculated_std.csv") smiles = df["smiles"].to_list() data = df[df.columns[3:]].to_numpy() print("Building LegoModel") legoModel = LegoGram(smiles = smiles, nworkers=8) torch.save(legoModel, "legoModel.pk") print("Building sampler") sampler = LegoGramRNNSampler(legoModel) torch.save(sampler, "sampler.pk") print("Constracting dataset") dataset = MolecularNotationDataset(smiles,sampler,data) torch.save(dataset,'lg.bin') else: dataset = torch.load('lg.bin') train_loader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collect) device = torch.device('cpu') if args.cuda: device = torch.device('cuda') model = RNN(voc_size=dataset.vocsize, device=device) model.train() model.cuda() print(f"Model has been created on device {device}") smiles_dataset = dataset.smiles optimizer = optim.Adam(model.parameters(), lr=args.lr) loss_f = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) writer = SummaryWriter(comment = args.name_task) losses = [] out_counter = 0 cnt = 0 for epoch in range(args.num_epochs): loss_list =[] for iteration, (batch, lengths) in enumerate(tqdm(train_loader)): batch = batch.cuda() logits, endp_model = model(batch, lengths) print(logits.shape) print(batch.shape) loss = loss_f(logits[:, :, :-1], batch[:, 1:]) loss_list.append(loss.item()) writer.add_scalar("CrossEntropyLoss", loss_list[-1], iteration+epoch*len(train_loader)) optimizer.zero_grad() loss.backward() optimizer.step() if iteration % args.print_every == 0 and iteration > 0: model.eval() number_generate = 100 res = model.sample(number_generate, dataset.model) writer.add_text("Molecules after generator", json.dumps([res])) valid = len(res) * 100 / number_generate print(res) print("valid : {} %".format(valid)) writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) unique = len([elem for elem in res if elem not in smiles_dataset]) writer.add_text("Unique mols", json.dumps([res])) print(f"There are unique mols {unique}") print(res) writer.add_scalar("Unique", unique, cnt) cnt += 1 model.train() writer.flush() epoch_loss = np.mean(loss_list) print(f"Loss on epoch {epoch } is {epoch_loss}") if out_counter < args.stop_after and epoch>0: if losses[-1] <= epoch_loss: out_counter += 1 else: out_counter = 0 torch.save(model, "experiments/" + args.name_task + "/model.pt") if epoch == 0: torch.save(model, "experiments/" + args.name_task + "/model.pt") losses.append(epoch_loss) return losses
def train_model(voc_dir, smi_dir, prior_dir, tf_dir, tf_process_dir, freeze=False): """ Transfer learning on target molecules using the SMILES structures Args: voc_dir: location of the vocabulary smi_dir: location of the SMILES file used for transfer learning prior_dir: location of prior trained model to initialize transfer learning tf_dir: location to save the transfer learning model tf_process_dir: location to save the SMILES sampled while doing transfer learning freeze: Bool. If true, all parameters in the RNN will be frozen except for the last linear layer during transfer learning. Returns: None """ voc = Vocabulary(init_from_file=voc_dir) #cano_smi_file('all_smi_refined.csv', 'all_smi_refined_cano.csv') moldata = MolData(smi_dir, voc) # Monomers 67 and 180 were removed because of the unseen [C-] in voc # DAs containing [C] removed: 43 molecules in 5356; Ge removed: 154 in 5356; [c] removed 4 in 5356 # [S] 1 molecule in 5356 data = DataLoader(moldata, batch_size=64, shuffle=True, drop_last=False, collate_fn=MolData.collate_fn) transfer_model = RNN(voc) # if freeze=True, freeze all parameters except those in the linear layer if freeze: for param in transfer_model.rnn.parameters(): param.requires_grad = False transfer_model.rnn.linear = nn.Linear(512, voc.vocab_size) if torch.cuda.is_available(): transfer_model.rnn.load_state_dict(torch.load(prior_dir)) else: transfer_model.rnn.load_state_dict( torch.load(prior_dir, map_location=lambda storage, loc: storage)) optimizer = torch.optim.Adam(transfer_model.rnn.parameters(), lr=0.0005) smi_lst = [] epoch_lst = [] for epoch in range(1, 11): for step, batch in tqdm(enumerate(data), total=len(data)): seqs = batch.long() log_p, _ = transfer_model.likelihood(seqs) loss = -log_p.mean() optimizer.zero_grad() loss.backward() optimizer.step() if step % 80 == 0 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write('*' * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data[0])) seqs, likelihood, _ = transfer_model.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + '\n') torch.save(transfer_model.rnn.state_dict(), tf_dir) seqs, likelihood, _ = transfer_model.sample(1024) valid = 0 #valid_smis = [] for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): try: AllChem.GetMorganFingerprintAsBitVect( Chem.MolFromSmiles(smile), 2, 1024) valid += 1 smi_lst.append(smile) epoch_lst.append(epoch) except: continue torch.save(transfer_model.rnn.state_dict(), tf_dir) transfer_process_df = pd.DataFrame(columns=['SMILES', 'Epoch']) transfer_process_df['SMILES'] = pd.Series(data=smi_lst) transfer_process_df['Epoch'] = pd.Series(data=epoch_lst) transfer_process_df.to_csv(tf_process_dir)
def main(batch_size, embed_size, num_hiddens, num_layers, ln_hidden, ln_output, rec_unit, learning_rate=1e-4, log_step=10, num_epochs=50, save_step=100, ngpu=1): # hyperparameters num_workers = 0 checkpoint_dir = 'checkpoint' # Image Preprocessing transform = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), } # load data vocab = build_vocab(path='relative_captions_shoes.json') train_data, train_loader = data_and_loader( path='relative_captions_shoes.json', mode='train', vocab=vocab, transform=transform['train'], batch_size=batch_size) val_data, val_loader = data_and_loader(path='relative_captions_shoes.json', mode='valid', vocab=vocab, transform=transform['val'], batch_size=batch_size) losses_val = [] losses_train = [] # Build the models initial_step = initial_epoch = 0 encoder = CNN(embed_size) ### embed_size: power of 2 middle = fcNet(embed_size, ln_hidden, ln_output) decoder = RNN(ln_output, num_hiddens, len(vocab), num_layers, rec_unit=rec_unit, drop_out=0.1) # Loss, parameters & optimizer loss_fun = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): print('Epoch: {}'.format(epoch)) for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() middle.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPUs features = nn.parallel.data_parallel( encoder, images, range(ngpu)) rnn_input = nn.parallel.data_parallel( middle, features, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) train_loss = loss_fun(outputs, targets) losses_train.append(train_loss.item()) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] #features = encoder(target_images) - encoder(refer_images) features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) val_loss = loss_fun(outputs, targets) batch_loss_val.append(val_loss.item()) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(rnn_input) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
def main(): args = parse_args() config = parse_config(args) np.random.seed(config.seed) if args.gpu and torch.cuda.is_available(): torch.cuda.manual_seed_all(config.seed) device = torch.device('cuda') else: device = torch.device('cpu') torch.manual_seed(config.seed) dataset = NIPS2015Dataset(batch_size=config.batch_size, seq_len=config.seq_len, data_folder=args.data_dir) rnn = RNN(vocab_size=dataset.voc_len, embedding_dim=config.embedding_dim, num_lstm_units=config.num_lstm_units, num_lstm_layers=config.num_lstm_layers, dataset=dataset, device=device) checkpoint = torch.load(os.path.join(args.checkpoint_dir, 'checkpoint.pth'), map_location=device) rnn.load_state_dict(checkpoint['rnn']) print("# RNN weights restored.") # question 3) with open('samples.txt', 'w') as f: for i in range(5): text = 'sample {}: '.format(i + 1) sample = rnn.sample(SAMPLE_SEQ_LEN) text += ''.join([dataset.idx2char[i] for i in sample]) f.write(text + '\n') print("# Samples written to samples.txt.") # question 4) plot_log_p('random', dataset, rnn) plot_log_p('shakespeare', dataset, rnn) plot_log_p('nips', dataset, rnn) # question 5) with open('snippets.pkl', 'rb') as f: snippets = pkl.load(f) lbls = [] for snippet in snippets: # Compute the log-likelihood of the current snippet ll = rnn.compute_prob( np.asarray([dataset.char2idx[c] for c in snippet])) ##### complete the code here ##### # infer the label of the current snippet and append it to lbls. # If the snippet is generated randomly, append 0 # If the snippet is from Shakespeare's work, append 1 # If the snippet is retrieved from a NIPS paper, append 2 ################################# # compute label = f(ll) # lbls.append(label) #print("Snippet:") #print(snippet) label = 0 #label categorization: depending on the histogram limits for each class, adjust label assignment if (ll > -800 and ll < -650): label = 0 if (ll > -450 and ll < -210): label = 1 if (ll > -230 and ll < -100): label = 2 #print("label=") #print(label) lbls.append(label) with open("answers.pkl", 'wb') as f: pkl.dump(lbls, f, protocol=pkl.HIGHEST_PROTOCOL) print("# Answers written to answers.pkl.") return 0
def pretrain(restore_from=None, save_to="data/Prior.ckpt", data="data/mols_filtered.smi", voc_file="data/Voc", batch_size=128, learning_rate=0.001, n_epochs=5, store_loss_dir=None, embedding_size=32): """Trains the Prior RNN""" # Read vocabulary from a file voc = Vocabulary(init_from_file=voc_file) # Create a Dataset from a SMILES file moldata = MolData(data, voc) data = DataLoader(moldata, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) Prior = RNN(voc, embedding_size) # Adding a file to log loss info if store_loss_dir is None: out_f = open("loss.csv", "w") else: out_f = open("{}/loss.csv".format(store_loss_dir.rstrip("/")), "w") out_f.write("Step,Loss\n") # Can restore from a saved RNN if restore_from: Prior.rnn.load_state_dict(torch.load(restore_from)) # For later plotting the loss training_step_counter = 0 n_logging = 100 optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr=learning_rate) for epoch in range(1, n_epochs + 1): # When training on a few million compounds, this model converges # in a few of epochs or even faster. If model sized is increased # its probably a good idea to check loss against an external set of # validation SMILES to make sure we dont overfit too much. for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, _ = Prior.likelihood(seqs) loss = -log_p.mean() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Logging the loss to a file if training_step_counter % n_logging == 0: out_f.write("{},{}\n".format(step, loss)) training_step_counter += 1 # Every 500 steps we decrease learning rate and print some information if step % 500 == 0 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write("*" * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data)) seqs, likelihood, _ = Prior.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n") torch.save(Prior.rnn.state_dict(), save_to) # Save the Prior torch.save(Prior.rnn.state_dict(), save_to) f_out.close()
def train_agent(runname='celecoxib', priorname='chembl', scoring_function='Tanimoto', scoring_function_kwargs=None, save_dir=None, batch_size=64, n_steps=3000, num_processes=6, sigma=60, experience_replay=5, lr=0.0005): print("\nStarting run %s with prior %s ..." % (runname, priorname)) start_time = time.time() voc = Vocabulary(init_from_file="data/Voc_%s" % priorname) prior = RNN(voc) agent = RNN(voc) writer = SummaryWriter('logs/%s' % runname) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname)) else: prior.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) agent.rnn.load_state_dict( torch.load('data/prior_%s.ckpt' % priorname, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_ids = unique(seqs) seqs = seqs[unique_ids] agent_likelihood = agent_likelihood[unique_ids] entropy = entropy[unique_ids] # Get prior likelihood and score prior_likelihood, _ = prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) best_memory = experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h\n" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Log writer.add_scalar('loss', loss.item(), step) writer.add_scalar('score', np.mean(score), step) writer.add_scalar('entropy', entropy.mean(), step) if best_memory: writer.add_scalar('best_memory', best_memory, step) # get 4 random valid smiles and scores for logging val_ids = np.array( [i for i, s in enumerate(smiles) if is_valid_mol(s)]) val_ids = np.random.choice(val_ids, 4, replace=False) smiles = np.array(smiles)[val_ids] score = ['%.3f' % s for s in np.array(score)[val_ids]] writer.add_image('generated_mols', mol_to_torchimage(smiles, score), step) # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'results/%s' % runname + time.strftime( "%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname)) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(agent.rnn.state_dict(), os.path.join(save_dir, 'Agent_%s.ckpt' % runname)) seqs, agent_likelihood, entropy = agent.sample(256) prior_likelihood, _ = prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled.txt"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood)) print("\nDONE! Whole run took %s" % datetime.timedelta(seconds=time.time() - start_time))
def sample_smiles(voc_dir, nums, outfn, tf_dir, until=False): """Sample smiles using the transferred model""" voc = Vocabulary(init_from_file=voc_dir) transfer_model = RNN(voc) output = open(outfn, 'w') if torch.cuda.is_available(): transfer_model.rnn.load_state_dict(torch.load(tf_dir)) else: transfer_model.rnn.load_state_dict( torch.load(tf_dir, map_location=lambda storage, loc: storage)) for param in transfer_model.rnn.parameters(): param.requires_grad = False if not until: seqs, likelihood, _ = transfer_model.sample(nums) valid = 0 double_br = 0 unique_idx = unique(seqs) seqs = seqs[unique_idx] for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): try: AllChem.GetMorganFingerprintAsBitVect( Chem.MolFromSmiles(smile), 2, 1024) valid += 1 output.write(smile + '\n') except: continue #if smile.count('Br') == 2: # double_br += 1 #output.write(smile+'\n') tqdm.write( '\n{} molecules sampled, {} valid SMILES, {} with double Br'. format(nums, valid, double_br)) output.close() else: valid = 0 n_sample = 0 while valid < nums: seq, likelihood, _ = transfer_model.sample(1) n_sample += 1 seq = seq.cpu().numpy() seq = seq[0] # print(seq) smile = voc.decode(seq) if Chem.MolFromSmiles(smile): try: AllChem.GetMorganFingerprintAsBitVect( Chem.MolFromSmiles(smile), 2, 1024) valid += 1 output.write(smile + '\n') #if valid % 100 == 0 and valid != 0: # tqdm.write('\n{} valid molecules sampled, with {} of total samples'.format(valid, n_sample)) except: continue tqdm.write( '\n{} valid molecules sampled, with {} of total samples'.format( nums, n_sample))
# Log some network weights that can be dynamically plotted with the vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print('Model initialized, starting training...') for step in range(n_steps): # sample from agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idx = unique(seqs) seqs = seqs[unique_idx] agent_likelihood = agent_likelihood[unique_idx] entropy = entropy[unique_idx] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2)
def train_agent(restore_prior_from='data/Prior.ckpt', restore_agent_from='data/Prior.ckpt', scoring_function='tanimoto', scoring_function_kwargs=None, save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=3000, num_processes=0, sigma=60, experience_replay=0): voc = Vocabulary(init_from_file="data/Voc") start_time = time.time() Prior = RNN(voc) Agent = RNN(voc) logger = VizardLog('data/logs') # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load(restore_prior_from)) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Prior.rnn.load_state_dict( torch.load(restore_prior_from, map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) # Log some network weights that can be dynamically plotted with the Vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood( exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow( (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat( (agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {:6.2f} {:6.2f} {:6.2f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) # Need this for Vizard plotting step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # Log some weights logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "weight_GRU_layer_2_b_hh") logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \ (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True) logger.log(np.array(step_score), "Scores") # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py")) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt')) seqs, agent_likelihood, entropy = Agent.sample(256) prior_likelihood, _ = Prior.likelihood(Variable(seqs)) prior_likelihood = prior_likelihood.data.cpu().numpy() smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) with open(os.path.join(save_dir, "sampled"), 'w') as f: f.write("SMILES Score PriorLogP\n") for smiles, score, prior_likelihood in zip(smiles, score, prior_likelihood): f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score, prior_likelihood))
def train_agent(restore_prior_from='data/Prior.ckpt', restore_agent_from='data/Prior.ckpt', voc_file='data/Voc', molscore_config=None, learning_rate=0.0005, batch_size=64, n_steps=3000, sigma=60, experience_replay=0): voc = Vocabulary(init_from_file=voc_file) start_time = time.time() # Scoring_function scoring_function = MolScore(molscore_config) scoring_function.log_parameters({'batch_size': batch_size, 'sigma': sigma}) print("Building RNNs") Prior = RNN(voc) Agent = RNN(voc) # By default restore Agent to same model as Prior, but can restore from already trained Agent too. # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these # to the CPU. if torch.cuda.is_available(): print("Cuda available, loading prior & agent") Prior.rnn.load_state_dict(torch.load(restore_prior_from)) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: print("Cuda not available, remapping to cpu") Prior.rnn.load_state_dict(torch.load(restore_prior_from, map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict(torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) # We dont need gradients with respect to Prior for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) # For logging purposes let's save some training parameters not captured by molscore with open(os.path.join(scoring_function.save_dir, 'reinvent_parameters.txt'), 'wt') as f: [f.write(f'{p}: {v}\n') for p, v in {'learning_rate': learning_rate, 'batch_size': batch_size, 'n_steps': n_steps, 'sigma': sigma, 'experience_replay': experience_replay}.items()] # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefore not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) # Using molscore instead here try: score = scoring_function(smiles, step=step) augmented_likelihood = prior_likelihood + sigma * Variable(score) except: # If anything goes wrong with molscore, write scores and save .ckpt and kill monitor with open(os.path.join(scoring_function.save_dir, f'failed_smiles_{scoring_function.step}.smi'), 'wt') as f: [f.write(f'{smi}\n') for smi in smiles] torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt')) scoring_function.write_scores() scoring_function.kill_dash_monitor() raise # Calculate loss loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience Replay # First sample if experience_replay and len(experience)>4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0) agent_likelihood = torch.cat((agent_likelihood, exp_agent_likelihood), 0) # Then add new experience prior_likelihood = prior_likelihood.data.cpu().numpy() new_experience = zip(smiles, score, prior_likelihood) experience.add_experience(new_experience) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = - (1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p # Calculate gradients and make an update to the network weights optimizer.zero_grad() loss.backward() optimizer.step() # Convert to numpy arrays so that we can print them augmented_likelihood = augmented_likelihood.data.cpu().numpy() agent_likelihood = agent_likelihood.data.cpu().numpy() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print(f"\n Step {step} Fraction valid SMILES: {fraction_valid_smiles(smiles) * 100:4.1f}\ Time elapsed: {time_elapsed:.2f}h Time left: {time_left:.2f}h") print(" Agent Prior Target Score SMILES") for i in range(10): print(f" {agent_likelihood[i]:6.2f} {prior_likelihood[i]:6.2f} {augmented_likelihood[i]:6.2f} {score[i]:6.2f} {smiles[i]}") # Save the agent weights every 250 iterations #### if step % 250 == 0 and step != 0: torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt')) # If the entire training finishes, write out MolScore dataframe, kill dash_utils monitor and # save the final Agent.ckpt torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{n_steps}.ckpt')) scoring_function.write_scores() scoring_function.kill_dash_monitor() return
def hill_climbing(pattern=None, restore_agent_from='data/Prior.ckpt', scoring_function='tanimoto', scoring_function_kwargs=None, save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=10, num_processes=0, use_custom_voc="data/Voc"): voc = Vocabulary(init_from_file=use_custom_voc) start_time = time.time() if pattern: Agent = scaffold_constrained_RNN(voc) else: Agent = RNN(voc) logger = VizardLog('data/logs') if torch.cuda.is_available(): Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) # Scoring_function scoring_function = get_scoring_function(scoring_function=scoring_function, num_processes=num_processes, **scoring_function_kwargs) # For policy based RL, we normally train on-policy and correct for the fact that more likely actions # occur more often (which means the agent can get biased towards them). Using experience replay is # therefor not as theoretically sound as it is for value based RL, but it seems to work well. experience = Experience(voc) # Log some network weights that can be dynamically plotted with the Vizard bokeh app logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "init_weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "init_weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "init_weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "init_weight_GRU_layer_2_b_hh") # Information for the logger step_score = [[], []] print("Model initialized, starting training...") for step in range(n_steps): # Sample from Agent if pattern: seqs, agent_likelihood, entropy = Agent.sample(pattern, batch_size) else: seqs, agent_likelihood, entropy = Agent.sample(batch_size) gc.collect() # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] # Get prior likelihood and score smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) new_experience = zip(smiles, score, agent_likelihood) experience.add_experience(new_experience) indexes = np.flip(np.argsort(np.array(score))) # Train the agent for 10 epochs on hill-climbing procedure for epoch in range(10): loss = Variable(torch.zeros(1)) counter = 0 seen_seqs = [] for j in indexes: if counter > 50: break seq = seqs[j] s = smiles[j] if s not in seen_seqs: seen_seqs.append(s) log_p, _ = Agent.likelihood(Variable(seq).view(1, -1)) loss -= log_p.mean() counter += 1 loss /= counter optimizer.zero_grad() loss.backward() optimizer.step() # Print some information for this step time_elapsed = (time.time() - start_time) / 3600 time_left = (time_elapsed * ((n_steps - step) / (step + 1))) print( "\n Step {} Fraction valid SMILES: {:4.1f} Time elapsed: {:.2f}h Time left: {:.2f}h" .format(step, fraction_valid_smiles(smiles) * 100, time_elapsed, time_left)) print(" Agent Prior Target Score SMILES") for i in range(10): print(" {:6.2f} {}".format(score[i], smiles[i])) # Need this for Vizard plotting step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # Log some weights logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_ih") logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100], "weight_GRU_layer_2_w_hh") logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30], "weight_GRU_embedding") logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(), "weight_GRU_layer_2_b_ih") logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(), "weight_GRU_layer_2_b_hh") logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \ (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True) logger.log(np.array(step_score), "Scores") # If the entire training finishes, we create a new folder where we save this python file # as well as some sampled sequences and the contents of the experinence (which are the highest # scored sequences seen during training) if not save_dir: save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) try: os.makedirs(save_dir) except: print("Folder already existing... overwriting previous results") copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py")) experience.print_memory(os.path.join(save_dir, "memory")) torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt')) previous_smiles = [] with open(os.path.join(save_dir, "memory.smi"), 'w') as f: for i, exp in enumerate(experience.memory): try: if Chem.MolToSmiles( Chem.rdmolops.RemoveStereochemistry( Chem.MolFromSmiles( exp[0]))) not in previous_smiles: f.write("{}\n".format(exp[0])) previous_smiles.append( Chem.MolToSmiles( Chem.rdmolops.RemoveStereochemistry( Chem.MolFromSmiles(exp[0])))) except: pass
def model(data='input.txt', hidden_size=256, seq_length=100, depth_size=3, batch_size=10, drop_rate=0.1, num_iteration=100, learning_rate=0.01, img_name='Figure'): # Open a training text file data = open('./data/' + data, 'rb').read().decode('UTF-8') chars = list(set(data)) chars.sort() data_size, vocab_size = len(data), len(chars) print('Data has %d total characters, %d unique characters.' % (data_size, vocab_size)) # Make a dictionary that maps {character:index} and {index:character} ch2ix, ix2ch = char_to_ix(chars), ix_to_char(chars) # Set RNN model model = RNN(vocab_size, vocab_size, hidden_size, seq_length, depth_size, batch_size, drop_rate) cnt = 0 losses = {} graph = Graph('Iteration', 'Loss') # Optimize model start = timeit.default_timer() for n in range(num_iteration): model.initialize_hidden_state() model.initialize_optimizer() # Split text by mini-batch with batch_size batch_length = data_size // batch_size for i in range(0, batch_length - seq_length, seq_length): mini_batch_X, mini_batch_Y = [], [] for j in range(0, data_size - batch_length + 1, batch_length): mini_batch_X.append(one_hot(data[j + i:j + i + seq_length], ch2ix)) mini_batch_Y.append([ch2ix[ch] for ch in data[j + i + 1:j + i + seq_length + 1]]) mini_batch_X = np.array(mini_batch_X) mini_batch_Y = np.array(mini_batch_Y) model.optimize(mini_batch_X, mini_batch_Y, learning_rate=learning_rate) cnt += 1 if cnt % 100 == 0 or cnt == 1: stop = timeit.default_timer() loss = model.loss() losses[cnt] = loss print("\n######################################") print("Total iteration: %d" % (n + 1)) print("Iteration: %d" % cnt) print("Loss: %f" % loss) print("Time: %f" % (stop - start)) ix = np.random.randint(0, vocab_size) sample_ixes = model.sample(ix, 200) txt = ''.join(ix2ch[ix] for ix in sample_ixes) print("\n### Starts Here ###\n\n" + txt.rstrip() + "\n\n### Ends Here ###") print("######################################") graph_x = np.array(sorted(losses)) graph_y = np.array([losses[key] for key in graph_x]) graph.update(graph_x, graph_y, img_name=img_name) return model, ch2ix, ix2ch