Exemple #1
0
def pretrain(restore_from=None):
    """Trains the Prior RNN"""

    # Read vocabulary from a file
    voc = Vocabulary(init_from_file="data/Voc")

    # Create a Dataset from a SMILES file
    moldata = MolData("data/mols_filtered.smi", voc)
    data = DataLoader(moldata,
                      batch_size=128,
                      shuffle=True,
                      drop_last=True,
                      collate_fn=MolData.collate_fn)

    Prior = RNN(voc)

    # Can restore from a saved RNN
    if restore_from:
        Prior.rnn.load_state_dict(torch.load(restore_from))

    optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr=0.001)
    for epoch in range(1, 6):
        # When training on a few million compounds, this model converges
        # in a few of epochs or even faster. If model sized is increased
        # its probably a good idea to check loss against an external set of
        # validation SMILES to make sure we dont overfit too much.
        for step, batch in tqdm(enumerate(data), total=len(data)):

            # Sample from DataLoader
            seqs = batch.long()

            # Calculate loss
            log_p, _ = Prior.likelihood(seqs)
            loss = -log_p.mean()

            # Calculate gradients and take a step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Every 500 steps we decrease learning rate and print some information
            if step % 500 == 0 and step != 0 and False:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                tqdm.write("*" * 50)
                tqdm.write(
                    "Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(
                        epoch, step, loss.data[0]))
                seqs, likelihood, _ = Prior.sample(128)
                valid = 0
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                    if i < 5:
                        tqdm.write(smile)
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid /
                                                             len(seqs)))
                tqdm.write("*" * 50 + "\n")
Exemple #2
0
def train_model():
    """Do transfer learning for generating SMILES"""
    voc = Vocabulary(init_from_file='data/Voc')
    cano_smi_file('refined_smii.csv', 'refined_smii_cano.csv')
    moldata = MolData('refined_smii_cano.csv', voc)
    # Monomers 67 and 180 were removed because of the unseen [C-] in voc
    # DAs containing [se] [SiH2] [n] removed: 38 molecules
    data = DataLoader(moldata,
                      batch_size=64,
                      shuffle=True,
                      drop_last=False,
                      collate_fn=MolData.collate_fn)
    transfer_model = RNN(voc)

    if torch.cuda.is_available():
        transfer_model.rnn.load_state_dict(torch.load('data/Prior.ckpt'))
    else:
        transfer_model.rnn.load_state_dict(
            torch.load('data/Prior.ckpt',
                       map_location=lambda storage, loc: storage))

    # for param in transfer_model.rnn.parameters():
    #     param.requires_grad = False
    optimizer = torch.optim.Adam(transfer_model.rnn.parameters(), lr=0.001)

    for epoch in range(1, 10):

        for step, batch in tqdm(enumerate(data), total=len(data)):
            seqs = batch.long()
            log_p, _ = transfer_model.likelihood(seqs)
            loss = -log_p.mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step % 5 == 0 and step != 0:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                tqdm.write('*' * 50)
                tqdm.write(
                    "Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(
                        epoch, step, loss.data[0]))
                seqs, likelihood, _ = transfer_model.sample(128)
                valid = 0
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                    if i < 5:
                        tqdm.write(smile)
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid /
                                                             len(seqs)))
                tqdm.write("*" * 50 + '\n')
                torch.save(transfer_model.rnn.state_dict(),
                           "data/transfer_model2.ckpt")

        torch.save(transfer_model.rnn.state_dict(),
                   "data/transfer_modelw.ckpt")
Exemple #3
0
def Transfer(restore_from = None):
    """Trains the Prior RNN"""

    voc = Vocabulary(init_from_file="./Voc")
    moldata = MolData("tl_filtered.smi", voc)
    data = DataLoader(moldata, batch_size=32, shuffle=True, drop_last=True,
                      collate_fn=MolData.collate_fn)
    
    Prior = RNN(voc)
    
    # Can restore from a saved RNN
    if restore_from:
        Prior.rnn.load_state_dict(torch.load(restore_from))

    optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr = 0.001)
    for epoch in range(1, 101):
        for step, batch in tqdm(enumerate(data), total=len(data)):

            # Sample from DataLoader
            seqs = batch.long()

            # Calculate loss
            log_p, _ = Prior.likelihood(seqs)
            loss = - log_p.mean()

            # Calculate gradients and take a step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Every 2 epoch we decrease learning rate and print some information
            if epoch % 2 == 0 and step == 1:
                #decrease_learning_rate(optimizer, decrease_by=0.03)
                decrease_learning_rate(optimizer, decrease_by=0.03)
            if epoch % 10 == 0 and step == 1:
                tqdm.write("*" * 50)
                tqdm.write("Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(epoch, step, loss.data[0]))
                seqs, likelihood, _ = Prior.sample(100)
                valid = 0
                f = open('tran_output.smi', 'a')
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                        f.write(smile + "\n")
                    if i < 10:
                        tqdm.write(smile)
                f.close()
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs)))
                tqdm.write("*" * 50 + "\n")               
        # Save the Prior
        torch.save(Prior.rnn.state_dict(), "data/100_epochs_transfer.ckpt")
Exemple #4
0
def hill_climbing(pattern=None,
                  restore_agent_from='data/Prior.ckpt',
                  scoring_function='tanimoto',
                  scoring_function_kwargs=None,
                  save_dir=None,
                  learning_rate=0.0005,
                  batch_size=64,
                  n_steps=10,
                  num_processes=0,
                  use_custom_voc="data/Voc"):

    voc = Vocabulary(init_from_file=use_custom_voc)

    start_time = time.time()
    if pattern:
        Agent = scaffold_constrained_RNN(voc)
    else:
        Agent = RNN(voc)

    logger = VizardLog('data/logs')

    if torch.cuda.is_available():
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    # Log some network weights that can be dynamically plotted with the Vizard bokeh app
    logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_ih")
    logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_hh")
    logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
               "init_weight_GRU_embedding")
    logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_ih")
    logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_hh")

    # Information for the logger
    step_score = [[], []]

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        if pattern:
            seqs, agent_likelihood, entropy = Agent.sample(pattern, batch_size)
        else:
            seqs, agent_likelihood, entropy = Agent.sample(batch_size)
        gc.collect()
        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        new_experience = zip(smiles, score, agent_likelihood)
        experience.add_experience(new_experience)

        indexes = np.flip(np.argsort(np.array(score)))
        # Train the agent for 10 epochs on hill-climbing procedure
        for epoch in range(10):
            loss = Variable(torch.zeros(1))
            counter = 0
            seen_seqs = []
            for j in indexes:
                if counter > 50:
                    break
                seq = seqs[j]
                s = smiles[j]
                if s not in seen_seqs:
                    seen_seqs.append(s)
                    log_p, _ = Agent.likelihood(Variable(seq).view(1, -1))
                    loss -= log_p.mean()
                    counter += 1
            loss /= counter
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}     {}".format(score[i], smiles[i]))
        # Need this for Vizard plotting
        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # Log some weights
        logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_ih")
        logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_hh")
        logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
                   "weight_GRU_embedding")
        logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_ih")
        logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_hh")
        logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \
                            (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True)
        logger.log(np.array(step_score), "Scores")

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S",
                                                       time.localtime())
    try:
        os.makedirs(save_dir)
    except:
        print("Folder already existing... overwriting previous results")

    copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py"))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt'))
    previous_smiles = []
    with open(os.path.join(save_dir, "memory.smi"), 'w') as f:
        for i, exp in enumerate(experience.memory):
            try:
                if Chem.MolToSmiles(
                        Chem.rdmolops.RemoveStereochemistry(
                            Chem.MolFromSmiles(
                                exp[0]))) not in previous_smiles:
                    f.write("{}\n".format(exp[0]))
                    previous_smiles.append(
                        Chem.MolToSmiles(
                            Chem.rdmolops.RemoveStereochemistry(
                                Chem.MolFromSmiles(exp[0]))))
            except:
                pass
Exemple #5
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                scoring_function='tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                learning_rate=0.0005,
                batch_size=64,
                n_steps=3000,
                num_processes=0,
                sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file="data/Voc")

    start_time = time.time()

    Prior = RNN(voc)
    Agent = RNN(voc)

    logger = VizardLog('data/logs')

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load(restore_prior_from))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Prior.rnn.load_state_dict(
            torch.load(restore_prior_from,
                       map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    # Log some network weights that can be dynamically plotted with the Vizard bokeh app
    logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_ih")
    logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_hh")
    logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
               "init_weight_GRU_embedding")
    logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_ih")
    logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_hh")

    # Information for the logger
    step_score = [[], []]

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))
        # Need this for Vizard plotting
        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # Log some weights
        logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_ih")
        logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_hh")
        logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
                   "weight_GRU_embedding")
        logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_ih")
        logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_hh")
        logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \
                            (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True)
        logger.log(np.array(step_score), "Scores")

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S",
                                                       time.localtime())
    os.makedirs(save_dir)
    copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py"))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt'))

    seqs, agent_likelihood, entropy = Agent.sample(256)
    prior_likelihood, _ = Prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))
def train_model(voc_dir,
                smi_dir,
                prior_dir,
                tf_dir,
                tf_process_dir,
                freeze=False):
    """
    Transfer learning on target molecules using the SMILES structures
    Args:
        voc_dir: location of the vocabulary
        smi_dir: location of the SMILES file used for transfer learning
        prior_dir: location of prior trained model to initialize transfer learning
        tf_dir: location to save the transfer learning model
        tf_process_dir: location to save the SMILES sampled while doing transfer learning
        freeze: Bool. If true, all parameters in the RNN will be frozen except for the last linear layer during
        transfer learning.

    Returns: None

    """
    voc = Vocabulary(init_from_file=voc_dir)
    #cano_smi_file('all_smi_refined.csv', 'all_smi_refined_cano.csv')
    moldata = MolData(smi_dir, voc)
    # Monomers 67 and 180 were removed because of the unseen [C-] in voc
    # DAs containing [C] removed: 43 molecules in 5356; Ge removed: 154 in 5356; [c] removed 4 in 5356
    # [S] 1 molecule in 5356
    data = DataLoader(moldata,
                      batch_size=64,
                      shuffle=True,
                      drop_last=False,
                      collate_fn=MolData.collate_fn)
    transfer_model = RNN(voc)
    # if freeze=True, freeze all parameters except those in the linear layer
    if freeze:
        for param in transfer_model.rnn.parameters():
            param.requires_grad = False
        transfer_model.rnn.linear = nn.Linear(512, voc.vocab_size)
    if torch.cuda.is_available():
        transfer_model.rnn.load_state_dict(torch.load(prior_dir))
    else:
        transfer_model.rnn.load_state_dict(
            torch.load(prior_dir, map_location=lambda storage, loc: storage))

    optimizer = torch.optim.Adam(transfer_model.rnn.parameters(), lr=0.0005)

    smi_lst = []
    epoch_lst = []
    for epoch in range(1, 11):

        for step, batch in tqdm(enumerate(data), total=len(data)):
            seqs = batch.long()
            log_p, _ = transfer_model.likelihood(seqs)
            loss = -log_p.mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step % 80 == 0 and step != 0:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                tqdm.write('*' * 50)
                tqdm.write(
                    "Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(
                        epoch, step, loss.data[0]))
                seqs, likelihood, _ = transfer_model.sample(128)
                valid = 0
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                    if i < 5:
                        tqdm.write(smile)
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid /
                                                             len(seqs)))
                tqdm.write("*" * 50 + '\n')
                torch.save(transfer_model.rnn.state_dict(), tf_dir)
        seqs, likelihood, _ = transfer_model.sample(1024)
        valid = 0
        #valid_smis = []
        for i, seq in enumerate(seqs.cpu().numpy()):
            smile = voc.decode(seq)
            if Chem.MolFromSmiles(smile):
                try:
                    AllChem.GetMorganFingerprintAsBitVect(
                        Chem.MolFromSmiles(smile), 2, 1024)
                    valid += 1
                    smi_lst.append(smile)
                    epoch_lst.append(epoch)
                except:
                    continue

        torch.save(transfer_model.rnn.state_dict(), tf_dir)

    transfer_process_df = pd.DataFrame(columns=['SMILES', 'Epoch'])
    transfer_process_df['SMILES'] = pd.Series(data=smi_lst)
    transfer_process_df['Epoch'] = pd.Series(data=epoch_lst)
    transfer_process_df.to_csv(tf_process_dir)
Exemple #7
0
def pretrain(runname='chembl', restore_from=None):
    """Trains the prior RNN"""

    writer = SummaryWriter('logs/%s' % runname)

    # Read vocabulary from a file
    voc = Vocabulary(init_from_file="data/Voc_%s" % runname)

    # Create a Dataset from a SMILES file
    moldata = MolData("data/mols_%s_filtered.smi" % runname, voc)
    data = DataLoader(moldata, batch_size=128, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn)

    prior = RNN(voc)
    # writer.add_graph(prior.rnn, data.dataset[0])

    # Can restore from a saved RNN
    if restore_from:
        prior.rnn.load_state_dict(torch.load(restore_from))

    optimizer = torch.optim.Adam(prior.rnn.parameters(), lr=0.001)

    running_loss = 0.0
    for epoch in range(1, 6):
        # When training on a few million compounds, this model converges
        # in a few of epochs or even faster. If model sized is increased
        # its probably a good idea to check loss against an external set of
        # validation SMILES to make sure we don't overfit too much.
        for step, batch in tqdm(enumerate(data), total=len(data)):

            # Sample from DataLoader
            seqs = batch.long()

            # Calculate loss
            log_p, entropy = prior.likelihood(seqs)
            loss = -log_p.mean()
            running_loss += loss.item()

            # Calculate gradients and take a step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Every 250 steps we decrease learning rate and print some information
            if step % 250 == 249 and step != 0:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                seqs, likelihood, _ = prior.sample(128)
                valid = 0
                smiles = list()
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                        if valid < 5:
                            tqdm.write(smile)
                            smiles.append(smile.strip())

                tqdm.write("*" * 50)
                tqdm.write("Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(epoch, step, running_loss / step))
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs)))
                tqdm.write("*" * 50 + "\n")
                torch.save(prior.rnn.state_dict(), "data/prior_%s.ckpt" % runname)
                writer.add_scalar('training loss', running_loss / 250, epoch * len(data) + step)
                writer.add_scalar('valid_smiles', 100 * valid / len(seqs), epoch * len(data) + step)
                writer.add_image('sampled_mols', mol_to_torchimage(smiles))
                running_loss = 0.0

        # Save the prior and close writer
        torch.save(prior.rnn.state_dict(), "data/prior_%s.ckpt" % runname)
        writer.close()
Exemple #8
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                voc_file='data/Voc',
                molscore_config=None,
                learning_rate=0.0005,
                batch_size=64, n_steps=3000, sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file=voc_file)

    start_time = time.time()

    # Scoring_function
    scoring_function = MolScore(molscore_config)
    scoring_function.log_parameters({'batch_size': batch_size, 'sigma': sigma})

    print("Building RNNs")

    Prior = RNN(voc)
    Agent = RNN(voc)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        print("Cuda available, loading prior & agent")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        print("Cuda not available, remapping to cpu")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from, map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from, map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)

    # For logging purposes let's save some training parameters not captured by molscore
    with open(os.path.join(scoring_function.save_dir, 'reinvent_parameters.txt'), 'wt') as f:
        [f.write(f'{p}: {v}\n') for p, v in {'learning_rate': learning_rate, 'batch_size': batch_size,
                                           'n_steps': n_steps, 'sigma': sigma,
                                           'experience_replay': experience_replay}.items()]

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefore not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)

        # Using molscore instead here
        try:
            score = scoring_function(smiles, step=step)
            augmented_likelihood = prior_likelihood + sigma * Variable(score)
        except:  # If anything goes wrong with molscore, write scores and save .ckpt and kill monitor
            with open(os.path.join(scoring_function.save_dir,
                                 f'failed_smiles_{scoring_function.step}.smi'), 'wt') as f:
                [f.write(f'{smi}\n') for smi in smiles]
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))
            scoring_function.write_scores()
            scoring_function.kill_dash_monitor()
            raise

        # Calculate loss
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience)>4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat((agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = - (1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(f"\n       Step {step}   Fraction valid SMILES: {fraction_valid_smiles(smiles) * 100:4.1f}\
          Time elapsed: {time_elapsed:.2f}h Time left: {time_left:.2f}h")
        print("  Agent   Prior   Target   Score             SMILES")
        for i in range(10):
            print(f" {agent_likelihood[i]:6.2f}   {prior_likelihood[i]:6.2f}  {augmented_likelihood[i]:6.2f}  {score[i]:6.2f}     {smiles[i]}")

        # Save the agent weights every 250 iterations  ####
        if step % 250 == 0 and step != 0:
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))

    # If the entire training finishes, write out MolScore dataframe, kill dash_utils monitor and
    # save the final Agent.ckpt
    torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{n_steps}.ckpt'))
    scoring_function.write_scores()
    scoring_function.kill_dash_monitor()
    
    return
    step_score = [[], []]

    print('Model initialized, starting training...')

    for step in range(n_steps):
        # sample from agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idx = unique(seqs)
        seqs = seqs[unique_idx]
        agent_likelihood = agent_likelihood[unique_idx]
        entropy = entropy[unique_idx]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience replay
        # First example
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
Exemple #10
0
def pretrain(restore_from=None,
             save_to="data/Prior.ckpt",
             data="data/mols_filtered.smi",
             voc_file="data/Voc",
             batch_size=128,
             learning_rate=0.001,
             n_epochs=5,
             store_loss_dir=None,
             embedding_size=32):
    """Trains the Prior RNN"""

    # Read vocabulary from a file
    voc = Vocabulary(init_from_file=voc_file)

    # Create a Dataset from a SMILES file
    moldata = MolData(data, voc)
    data = DataLoader(moldata,
                      batch_size=batch_size,
                      shuffle=True,
                      drop_last=True,
                      collate_fn=MolData.collate_fn)

    Prior = RNN(voc, embedding_size)

    # Adding a file to log loss info
    if store_loss_dir is None:
        out_f = open("loss.csv", "w")
    else:
        out_f = open("{}/loss.csv".format(store_loss_dir.rstrip("/")), "w")

    out_f.write("Step,Loss\n")

    # Can restore from a saved RNN
    if restore_from:
        Prior.rnn.load_state_dict(torch.load(restore_from))

    # For later plotting the loss
    training_step_counter = 0
    n_logging = 100

    optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr=learning_rate)
    for epoch in range(1, n_epochs + 1):
        # When training on a few million compounds, this model converges
        # in a few of epochs or even faster. If model sized is increased
        # its probably a good idea to check loss against an external set of
        # validation SMILES to make sure we dont overfit too much.
        for step, batch in tqdm(enumerate(data), total=len(data)):

            # Sample from DataLoader
            seqs = batch.long()

            # Calculate loss
            log_p, _ = Prior.likelihood(seqs)
            loss = -log_p.mean()

            # Calculate gradients and take a step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging the loss to a file
            if training_step_counter % n_logging == 0:
                out_f.write("{},{}\n".format(step, loss))
            training_step_counter += 1

            # Every 500 steps we decrease learning rate and print some information
            if step % 500 == 0 and step != 0:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                tqdm.write("*" * 50)
                tqdm.write(
                    "Epoch {:3d}   step {:3d}    loss: {:5.2f}\n".format(
                        epoch, step, loss.data))
                seqs, likelihood, _ = Prior.sample(128)
                valid = 0
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                    if i < 5:
                        tqdm.write(smile)
                tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid /
                                                             len(seqs)))
                tqdm.write("*" * 50 + "\n")
                torch.save(Prior.rnn.state_dict(), save_to)

        # Save the Prior
        torch.save(Prior.rnn.state_dict(), save_to)

    f_out.close()
Exemple #11
0
def train_agent(runname='celecoxib',
                priorname='chembl',
                scoring_function='Tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                batch_size=64,
                n_steps=3000,
                num_processes=6,
                sigma=60,
                experience_replay=5,
                lr=0.0005):
    print("\nStarting run %s with prior %s ..." % (runname, priorname))
    start_time = time.time()

    voc = Vocabulary(init_from_file="data/Voc_%s" % priorname)

    prior = RNN(voc)
    agent = RNN(voc)

    writer = SummaryWriter('logs/%s' % runname)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
        agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
    else:
        prior.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))
        agent.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):
        # Sample from Agent
        seqs, agent_likelihood, entropy = agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_ids = unique(seqs)
        seqs = seqs[unique_ids]
        agent_likelihood = agent_likelihood[unique_ids]
        entropy = entropy[unique_ids]

        # Get prior likelihood and score
        prior_likelihood, _ = prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        best_memory = experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h\n"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))

        # Log
        writer.add_scalar('loss', loss.item(), step)
        writer.add_scalar('score', np.mean(score), step)
        writer.add_scalar('entropy', entropy.mean(), step)
        if best_memory:
            writer.add_scalar('best_memory', best_memory, step)

        # get 4 random valid smiles and scores for logging
        val_ids = np.array(
            [i for i, s in enumerate(smiles) if is_valid_mol(s)])
        val_ids = np.random.choice(val_ids, 4, replace=False)
        smiles = np.array(smiles)[val_ids]
        score = ['%.3f' % s for s in np.array(score)[val_ids]]
        writer.add_image('generated_mols', mol_to_torchimage(smiles, score),
                         step)

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'results/%s' % runname + time.strftime(
            "%Y-%m-%d-%H_%M_%S", time.localtime())
    os.makedirs(save_dir)
    copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(agent.rnn.state_dict(),
               os.path.join(save_dir, 'Agent_%s.ckpt' % runname))

    seqs, agent_likelihood, entropy = agent.sample(256)
    prior_likelihood, _ = prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled.txt"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))

    print("\nDONE! Whole run took %s" %
          datetime.timedelta(seconds=time.time() - start_time))