Esempio n. 1
0
def main(voc_file='data/Voc',
         restore_model_from='data/Prior.ckpt',
         output_file='data/Prior_10k.smi',
         sample_size=10000):

    voc = Vocabulary(init_from_file=voc_file)
    print("Setting up networks")
    Agent = RNN(voc)

    if torch.cuda.is_available():
        print("Cuda available, loading prior & agent")
        Agent.rnn.load_state_dict(torch.load(restore_model_from))
    else:
        raise 'Cuda not available'


    SMILES = []
    for n in tqdm(range(sample_size//100), total=sample_size//100):
        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(100)
        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]
        smiles = seq_to_smiles(seqs, voc)
        SMILES += smiles

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, "wt") as f:
        [f.write(smi + '\n') for smi in SMILES]

    return
Esempio n. 2
0
def generate_smiles(n_smiles=500,
                    restore_from="data/Prior.ckpt",
                    voc_file="data/Voc",
                    embedding_size=128):
    """ 
    This function takes a checkpoint for a trained RNN and the vocabulary file and generates n_smiles new smiles strings.
    """
    n = 32
    n_smiles = n_smiles - n_smiles % n
    print("Generating %i smiles" % n_smiles)

    voc = Vocabulary(init_from_file=voc_file)
    generator = RNN(voc, embedding_size)

    if torch.cuda.is_available():
        generator.rnn.load_state_dict(torch.load(restore_from))
    else:
        generator.rnn.load_state_dict(
            torch.load(restore_from,
                       map_location=lambda storage, loc: storage))

    all_smiles = []
    for i in range(int(n_smiles / n)):
        sequences, _, _ = generator.sample(n)
        smiles = seq_to_smiles(sequences, voc)
        all_smiles += smiles

    # Freeing up memory
    del generator
    torch.cuda.empty_cache()

    return all_smiles
Esempio n. 3
0
def black_box(load_weights='./data/Prior.ckpt', batch_size=1):

    # Read vocabulary from a file
    voc = Vocabulary(init_from_file="data/Voc")

    vec_file = "data/vecs.dat"
    _, mew, std = get_latent_vector(None, vec_file, moments=True)
    vector = np.array([4.2619, 214.96, 512.07, 0.0, 1.0, 0.088, 7.0, 5.0, 100.01, 60.95, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 10.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 23.0, 0.0, 0.0, 25.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 34.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 14.0, 8.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 9.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 8.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 6.0, 0.0, 2.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 3.0, 28.0, 1.0, 5.0, 0.0, 2.0, 10.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 2.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 5.0, 0.0, 5.0, 7.0, 4.0, 2.0, 0.0, 16.0, 20.0, 43.0, 83.0, 90.0, 23.0, 8.0, 37.0, 5.0, 24.0, 5.0, 4.0, 16.0, 5.0, 25.0, 93.0, 92.0, 38.0, 0.0, 0.0, 0.0, 4.0])
    vector = (vector - mew) / std
    data = [vector]

    Prior = RNN(voc, len(data[0]))

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load(load_weights))
    else:
        Prior.rnn.load_state_dict(torch.load(load_weights, map_location=lambda storage, loc: storage))

    for test_vec in data:
        print('Test vector {}'.format(test_vec))
        test_vec = Variable(test_vec).float()
        valid = 0
        num_smi = 100
        all_smi = []
        for i in range(num_smi):
            seqs, prior_likelihood, entropy = Prior.sample(batch_size, test_vec)
            smiles = seq_to_smiles(seqs, voc)[0]
            if Chem.MolFromSmiles(smiles):
                        valid += 1
                        all_smi.append(smiles)

        for smi in all_smi:
            print(smi)
        print("\n{:>4.1f}% valid SMILES".format(100 * valid / len(range(num_smi))))
Esempio n. 4
0
def hill_climbing(pattern=None,
                  restore_agent_from='data/Prior.ckpt',
                  scoring_function='tanimoto',
                  scoring_function_kwargs=None,
                  save_dir=None,
                  learning_rate=0.0005,
                  batch_size=64,
                  n_steps=10,
                  num_processes=0,
                  use_custom_voc="data/Voc"):

    voc = Vocabulary(init_from_file=use_custom_voc)

    start_time = time.time()
    if pattern:
        Agent = scaffold_constrained_RNN(voc)
    else:
        Agent = RNN(voc)

    logger = VizardLog('data/logs')

    if torch.cuda.is_available():
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    # Log some network weights that can be dynamically plotted with the Vizard bokeh app
    logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_ih")
    logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_hh")
    logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
               "init_weight_GRU_embedding")
    logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_ih")
    logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_hh")

    # Information for the logger
    step_score = [[], []]

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        if pattern:
            seqs, agent_likelihood, entropy = Agent.sample(pattern, batch_size)
        else:
            seqs, agent_likelihood, entropy = Agent.sample(batch_size)
        gc.collect()
        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        new_experience = zip(smiles, score, agent_likelihood)
        experience.add_experience(new_experience)

        indexes = np.flip(np.argsort(np.array(score)))
        # Train the agent for 10 epochs on hill-climbing procedure
        for epoch in range(10):
            loss = Variable(torch.zeros(1))
            counter = 0
            seen_seqs = []
            for j in indexes:
                if counter > 50:
                    break
                seq = seqs[j]
                s = smiles[j]
                if s not in seen_seqs:
                    seen_seqs.append(s)
                    log_p, _ = Agent.likelihood(Variable(seq).view(1, -1))
                    loss -= log_p.mean()
                    counter += 1
            loss /= counter
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}     {}".format(score[i], smiles[i]))
        # Need this for Vizard plotting
        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # Log some weights
        logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_ih")
        logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_hh")
        logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
                   "weight_GRU_embedding")
        logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_ih")
        logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_hh")
        logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \
                            (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True)
        logger.log(np.array(step_score), "Scores")

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S",
                                                       time.localtime())
    try:
        os.makedirs(save_dir)
    except:
        print("Folder already existing... overwriting previous results")

    copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py"))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt'))
    previous_smiles = []
    with open(os.path.join(save_dir, "memory.smi"), 'w') as f:
        for i, exp in enumerate(experience.memory):
            try:
                if Chem.MolToSmiles(
                        Chem.rdmolops.RemoveStereochemistry(
                            Chem.MolFromSmiles(
                                exp[0]))) not in previous_smiles:
                    f.write("{}\n".format(exp[0]))
                    previous_smiles.append(
                        Chem.MolToSmiles(
                            Chem.rdmolops.RemoveStereochemistry(
                                Chem.MolFromSmiles(exp[0]))))
            except:
                pass
Esempio n. 5
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                scoring_function='tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                learning_rate=0.0005,
                batch_size=64,
                n_steps=3000,
                num_processes=0,
                sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file="data/Voc")

    start_time = time.time()

    Prior = RNN(voc)
    Agent = RNN(voc)

    logger = VizardLog('data/logs')

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load(restore_prior_from))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Prior.rnn.load_state_dict(
            torch.load(restore_prior_from,
                       map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    # Log some network weights that can be dynamically plotted with the Vizard bokeh app
    logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_ih")
    logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_hh")
    logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
               "init_weight_GRU_embedding")
    logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_ih")
    logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_hh")

    # Information for the logger
    step_score = [[], []]

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))
        # Need this for Vizard plotting
        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # Log some weights
        logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_ih")
        logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_hh")
        logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
                   "weight_GRU_embedding")
        logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_ih")
        logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_hh")
        logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \
                            (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True)
        logger.log(np.array(step_score), "Scores")

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S",
                                                       time.localtime())
    os.makedirs(save_dir)
    copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py"))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt'))

    seqs, agent_likelihood, entropy = Agent.sample(256)
    prior_likelihood, _ = Prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))
Esempio n. 6
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                voc_file='data/Voc',
                molscore_config=None,
                learning_rate=0.0005,
                batch_size=64, n_steps=3000, sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file=voc_file)

    start_time = time.time()

    # Scoring_function
    scoring_function = MolScore(molscore_config)
    scoring_function.log_parameters({'batch_size': batch_size, 'sigma': sigma})

    print("Building RNNs")

    Prior = RNN(voc)
    Agent = RNN(voc)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        print("Cuda available, loading prior & agent")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        print("Cuda not available, remapping to cpu")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from, map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from, map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)

    # For logging purposes let's save some training parameters not captured by molscore
    with open(os.path.join(scoring_function.save_dir, 'reinvent_parameters.txt'), 'wt') as f:
        [f.write(f'{p}: {v}\n') for p, v in {'learning_rate': learning_rate, 'batch_size': batch_size,
                                           'n_steps': n_steps, 'sigma': sigma,
                                           'experience_replay': experience_replay}.items()]

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefore not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)

        # Using molscore instead here
        try:
            score = scoring_function(smiles, step=step)
            augmented_likelihood = prior_likelihood + sigma * Variable(score)
        except:  # If anything goes wrong with molscore, write scores and save .ckpt and kill monitor
            with open(os.path.join(scoring_function.save_dir,
                                 f'failed_smiles_{scoring_function.step}.smi'), 'wt') as f:
                [f.write(f'{smi}\n') for smi in smiles]
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))
            scoring_function.write_scores()
            scoring_function.kill_dash_monitor()
            raise

        # Calculate loss
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience)>4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat((agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = - (1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(f"\n       Step {step}   Fraction valid SMILES: {fraction_valid_smiles(smiles) * 100:4.1f}\
          Time elapsed: {time_elapsed:.2f}h Time left: {time_left:.2f}h")
        print("  Agent   Prior   Target   Score             SMILES")
        for i in range(10):
            print(f" {agent_likelihood[i]:6.2f}   {prior_likelihood[i]:6.2f}  {augmented_likelihood[i]:6.2f}  {score[i]:6.2f}     {smiles[i]}")

        # Save the agent weights every 250 iterations  ####
        if step % 250 == 0 and step != 0:
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))

    # If the entire training finishes, write out MolScore dataframe, kill dash_utils monitor and
    # save the final Agent.ckpt
    torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{n_steps}.ckpt'))
    scoring_function.write_scores()
    scoring_function.kill_dash_monitor()
    
    return
Esempio n. 7
0
def train_agent(runname='celecoxib',
                priorname='chembl',
                scoring_function='Tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                batch_size=64,
                n_steps=3000,
                num_processes=6,
                sigma=60,
                experience_replay=5,
                lr=0.0005):
    print("\nStarting run %s with prior %s ..." % (runname, priorname))
    start_time = time.time()

    voc = Vocabulary(init_from_file="data/Voc_%s" % priorname)

    prior = RNN(voc)
    agent = RNN(voc)

    writer = SummaryWriter('logs/%s' % runname)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
        agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
    else:
        prior.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))
        agent.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):
        # Sample from Agent
        seqs, agent_likelihood, entropy = agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_ids = unique(seqs)
        seqs = seqs[unique_ids]
        agent_likelihood = agent_likelihood[unique_ids]
        entropy = entropy[unique_ids]

        # Get prior likelihood and score
        prior_likelihood, _ = prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        best_memory = experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h\n"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))

        # Log
        writer.add_scalar('loss', loss.item(), step)
        writer.add_scalar('score', np.mean(score), step)
        writer.add_scalar('entropy', entropy.mean(), step)
        if best_memory:
            writer.add_scalar('best_memory', best_memory, step)

        # get 4 random valid smiles and scores for logging
        val_ids = np.array(
            [i for i, s in enumerate(smiles) if is_valid_mol(s)])
        val_ids = np.random.choice(val_ids, 4, replace=False)
        smiles = np.array(smiles)[val_ids]
        score = ['%.3f' % s for s in np.array(score)[val_ids]]
        writer.add_image('generated_mols', mol_to_torchimage(smiles, score),
                         step)

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'results/%s' % runname + time.strftime(
            "%Y-%m-%d-%H_%M_%S", time.localtime())
    os.makedirs(save_dir)
    copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(agent.rnn.state_dict(),
               os.path.join(save_dir, 'Agent_%s.ckpt' % runname))

    seqs, agent_likelihood, entropy = agent.sample(256)
    prior_likelihood, _ = prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled.txt"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))

    print("\nDONE! Whole run took %s" %
          datetime.timedelta(seconds=time.time() - start_time))