コード例 #1
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                voc_file='data/Voc',
                molscore_config=None,
                learning_rate=0.0005,
                batch_size=64, n_steps=3000, sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file=voc_file)

    start_time = time.time()

    # Scoring_function
    scoring_function = MolScore(molscore_config)
    scoring_function.log_parameters({'batch_size': batch_size, 'sigma': sigma})

    print("Building RNNs")

    Prior = RNN(voc)
    Agent = RNN(voc)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        print("Cuda available, loading prior & agent")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        print("Cuda not available, remapping to cpu")
        Prior.rnn.load_state_dict(torch.load(restore_prior_from, map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from, map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)

    # For logging purposes let's save some training parameters not captured by molscore
    with open(os.path.join(scoring_function.save_dir, 'reinvent_parameters.txt'), 'wt') as f:
        [f.write(f'{p}: {v}\n') for p, v in {'learning_rate': learning_rate, 'batch_size': batch_size,
                                           'n_steps': n_steps, 'sigma': sigma,
                                           'experience_replay': experience_replay}.items()]

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefore not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)

        # Using molscore instead here
        try:
            score = scoring_function(smiles, step=step)
            augmented_likelihood = prior_likelihood + sigma * Variable(score)
        except:  # If anything goes wrong with molscore, write scores and save .ckpt and kill monitor
            with open(os.path.join(scoring_function.save_dir,
                                 f'failed_smiles_{scoring_function.step}.smi'), 'wt') as f:
                [f.write(f'{smi}\n') for smi in smiles]
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))
            scoring_function.write_scores()
            scoring_function.kill_dash_monitor()
            raise

        # Calculate loss
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience)>4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat((agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = - (1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(f"\n       Step {step}   Fraction valid SMILES: {fraction_valid_smiles(smiles) * 100:4.1f}\
          Time elapsed: {time_elapsed:.2f}h Time left: {time_left:.2f}h")
        print("  Agent   Prior   Target   Score             SMILES")
        for i in range(10):
            print(f" {agent_likelihood[i]:6.2f}   {prior_likelihood[i]:6.2f}  {augmented_likelihood[i]:6.2f}  {score[i]:6.2f}     {smiles[i]}")

        # Save the agent weights every 250 iterations  ####
        if step % 250 == 0 and step != 0:
            torch.save(Agent.rnn.state_dict(),
                       os.path.join(scoring_function.save_dir, f'Agent_{step}.ckpt'))

    # If the entire training finishes, write out MolScore dataframe, kill dash_utils monitor and
    # save the final Agent.ckpt
    torch.save(Agent.rnn.state_dict(), os.path.join(scoring_function.save_dir, f'Agent_{n_steps}.ckpt'))
    scoring_function.write_scores()
    scoring_function.kill_dash_monitor()
    
    return
コード例 #2
0
ファイル: model_rnn.py プロジェクト: jkwang93/MCMG
 def init_h(self, batch_size):
     # Initial cell state is zero
     return Variable(torch.zeros(3, batch_size, 512))
コード例 #3
0
    kwargs = {'num_workers': 0, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    conf_thresh = 0.005
    nms_thresh = 0.45
    metrics = []
    labels = []

    for batch_idx, (data, targets) in enumerate(
            tqdm.tqdm(valid_loader, desc="Detecting objects")):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = m(data).data
        batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes,
                                       m.anchors, m.num_anchors, 0, 1)
        for i in range(output.size(0)):
            targets_i = targets[i]
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            width, height = get_image_size(valid_files[i])
            targets_i = rescale_target(targets_i, width, height)
            labels += targets_i[:, 0].tolist()
            prediction = boxes_to_prediction(boxes, width, height)
            metrics += get_prediction_metrics(prediction, targets_i)

    show_eval_result(metrics, labels)
コード例 #4
0
ファイル: model.py プロジェクト: adw62/REINVENT
 def init_h(self, batch_size, latent_vectors):
     # Initial cell state is zero
     #return Variable(torch.zeros(3, batch_size, 330))
     # or Initial cell state is latent vector
     return Variable(latent_vectors.repeat(3, 1, 1))
コード例 #5
0
def train_agent(restore_prior_from='data/Prior.ckpt',
                restore_agent_from='data/Prior.ckpt',
                scoring_function='tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                learning_rate=0.0005,
                batch_size=64,
                n_steps=3000,
                num_processes=0,
                sigma=60,
                experience_replay=0):

    voc = Vocabulary(init_from_file='data/DistributionLearningBenchmark/Voc')

    start_time = time.time()

    Prior = RNN(voc)
    Agent = RNN(voc)

    logger = VizardLog('data/logs')

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt'))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Prior.rnn.load_state_dict(
            torch.load('data/Prior.ckpt',
                       map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=0.0005)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    # Log some network weights that can be dynamically plotted with the Vizard bokeh app
    logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_ih")
    logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
               "init_weight_GRU_layer_2_w_hh")
    logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
               "init_weight_GRU_embedding")
    logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_ih")
    logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
               "init_weight_GRU_layer_2_b_hh")

    # Information for the logger
    step_score = [[], []]

    print("Model initialized, starting training...")

    for step in range(n_steps):

        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))
        # Need this for Vizard plotting
        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # Log some weights
        logger.log(Agent.rnn.gru_2.weight_ih.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_ih")
        logger.log(Agent.rnn.gru_2.weight_hh.cpu().data.numpy()[::100],
                   "weight_GRU_layer_2_w_hh")
        logger.log(Agent.rnn.embedding.weight.cpu().data.numpy()[::30],
                   "weight_GRU_embedding")
        logger.log(Agent.rnn.gru_2.bias_ih.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_ih")
        logger.log(Agent.rnn.gru_2.bias_hh.cpu().data.numpy(),
                   "weight_GRU_layer_2_b_hh")
        logger.log("\n".join([smiles + "\t" + str(round(score, 2)) for smiles, score in zip \
                            (smiles[:12], score[:12])]), "SMILES", dtype="text", overwrite=True)
        logger.log(np.array(step_score), "Scores")

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'data/results/run_' + time.strftime("%Y-%m-%d-%H_%M_%S",
                                                       time.localtime())
    os.makedirs(save_dir)
    copyfile('train_agent.py', os.path.join(save_dir, "train_agent.py"))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'Agent.ckpt'))

    seqs, agent_likelihood, entropy = Agent.sample(256)
    prior_likelihood, _ = Prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))
コード例 #6
0
 def __getitem__(self, i):
     mol = self.smiles[i]
     tokenized = self.voc.tokenize(mol)
     encoded = self.voc.encode(tokenized)
     return Variable(encoded)
コード例 #7
0
    def sample(self, pattern="CC(*)CC", batch_size=128, max_length=140):
        """ 
            Only difference with classic RNN based sampling.
            Sample a batch of sequences with given scaffold.

            Args:
                pattern: Scaffold that need to be respected
                distributions: Distribution on the length of 
                batch_size : Number of sequences to sample 
                max_length:  Maximum length of the sequences

            Outputs:
            seqs: (batch_size, seq_length) The sampled sequences.
            log_probs : (batch_size) Log likelihood for each sequence.
            entropy: (batch_size) The entropies for the sequences. Not
                                    currently used.
                                    
        """

        # get the tokenized version of the pattern
        pattern = np.array(tokenize_custom(pattern))

        start_token = Variable(torch.zeros(batch_size).long())
        start_token[:] = self.voc.vocab['GO']

        h = self.rnn.init_h(batch_size)
        x = start_token

        sequences = []
        log_probs = Variable(torch.zeros(batch_size))
        finished = torch.zeros(batch_size).byte()
        entropy = Variable(torch.zeros(batch_size))

        if torch.cuda.is_available():
            finished = finished.cuda()

        # tracks if there is an opened parenthesis
        opened = np.array(np.zeros(shape=batch_size), dtype=bool)

        # tracks if there is a constrained choice
        constrained_choices = np.array(np.zeros(shape=batch_size), dtype=bool)

        # tracks number of opening and closing parentheses
        opening_parentheses = np.ones(shape=batch_size)
        closing_parentheses = np.zeros(shape=batch_size)

        # tracks number of steps in the fragment that is being sampled
        # (if the RNN never samples the matching parenthesis we terminate sampling of this given molecule
        n_steps = np.zeros(shape=batch_size)

        # tracks opened cycles
        opened_cycles = [[
            'A',
        ] for i in range(batch_size)]
        counts = np.zeros(shape=batch_size, dtype=int)

        # tracks the position in the scaffold's pattern
        trackers = np.zeros(shape=batch_size, dtype=int)
        current_pattern_indexes = np.array(
            [pattern[index] for index in trackers])

        for step in range(max_length):

            # Getting the position in the pattern of every example in the batch
            previous_pattern_indexes = current_pattern_indexes
            current_pattern_indexes = np.array(
                [pattern[index] for index in trackers])

            # Check if a decoration is currently opened
            opened = np.logical_or(
                np.logical_and(current_pattern_indexes == '*',
                               previous_pattern_indexes == '('), opened)

            # And if we're heading to a constrained choice
            constrained_choices = np.array(
                [x[0] == '[' and ',' in x for x in current_pattern_indexes],
                dtype=bool)

            # In this case we already sampled this branch and need to move on for one step in the pattern
            trackers += 1 * np.logical_and(current_pattern_indexes == '*',
                                           previous_pattern_indexes == '(')

            # Sample according to conditional probability distribution of the RNN
            logits, h = self.rnn(x, h)
            prob = F.softmax(logits)
            log_prob = F.log_softmax(logits)
            x = torch.multinomial(prob, num_samples=1).view(-1)

            # If not opened, replace with current pattern token, else keep the sample
            # And update number of opened and closed parentheses
            # If closed, resume to opened

            # iterating over the batch:
            # there might be a smart way to parallelize all this but we didn't focus on it
            # as sampling speed is not necessarly a bottleneck in our applications
            for i in range(batch_size):

                # to keep track of opening and closing parentheses
                is_open = opened[i]
                if is_open:
                    n_steps[i] += 1
                    if n_steps[i] > 50:
                        x[i] = self.voc.vocab['EOS']
                    opening_parentheses[i] += (
                        x[i] == self.voc.vocab['(']).byte() * 1
                    closing_parentheses[i] += (
                        x[i] == self.voc.vocab[')']).byte() * 1
                    n_opened = opening_parentheses[i]
                    n_closed = closing_parentheses[i]
                    if (n_opened == n_closed):
                        opening_parentheses[i] += 1
                        opened[i] = False
                        trackers[i] += 1

                # if we have a constrained choice
                # we apply a mask on the probability vector
                elif constrained_choices[i]:

                    choices = current_pattern_indexes[i][1:-1].split(',')
                    probabilities = prob[i, :]
                    mask = torch.zeros_like(probabilities)
                    for choice in choices:
                        mask[self.voc.vocab[choice]] = 1
                    probabilities *= mask
                    probabilities /= torch.sum(probabilities, dim=-1)
                    x[i] = torch.multinomial(probabilities,
                                             num_samples=1).view(-1)
                    trackers[i] += 1 * (x[i] != self.voc.vocab['EOS']).byte()

                # In this case we need to sample
                # We make the distinction between branch (first case) and linked (second case)
                elif current_pattern_indexes[i] == '*':
                    if pattern[trackers[i]] == ')':
                        n_steps[i] += 1
                        if n_steps[i] > 50:
                            x[i] = self.voc.vocab['EOS']
                        opening_parentheses[i] += (
                            x[i] == self.voc.vocab['(']).byte() * 1
                        closing_parentheses[i] += (
                            x[i] == self.voc.vocab[')']).byte() * 1
                        n_opened = opening_parentheses[i]
                        n_closed = closing_parentheses[i]
                        if (n_opened == n_closed):
                            opening_parentheses[i] += 1
                            opened[i] = False
                            trackers[i] += 1
                    else:
                        # The following lines are to avoid that sampling finishes too early
                        probabilities = prob[i, :]
                        mask = torch.ones_like(probabilities)
                        mask[self.voc.vocab['EOS']] = 0
                        probabilities *= mask
                        probabilities /= torch.sum(probabilities, dim=-1)
                        x[i] = torch.multinomial(probabilities,
                                                 num_samples=1).view(-1)

                        opening_parentheses[i] += (
                            x[i] == self.voc.vocab['(']).byte() * 1
                        closing_parentheses[i] += (
                            x[i] == self.voc.vocab[')']).byte() * 1
                        n_opened = opening_parentheses[i]
                        n_closed = closing_parentheses[i]
                        for cycle in range(1, 10):
                            if (x[i] == self.voc.vocab[str(cycle)]
                                ).byte() and (cycle in opened_cycles[i]):
                                opened_cycles[i].remove(cycle)
                                break
                            elif (x[i] == self.voc.vocab[str(cycle)]).byte():
                                opened_cycles[i].append(cycle)
                                break

                        # Override with specified distribution for minimal fragment size
                        # You could also make this an argument of the sample function
                        # You can also keep this parameter fixed manually as it currently is
                        # The sampling of the linker will only stop when size is > to minimal_linked_size
                        # and cycles and branches are completed

                        minimal_linker_size = 5

                        if (n_opened == n_closed + 1) and len(
                                opened_cycles[i]
                        ) == 1 and counts[i] > minimal_linker_size:
                            opening_parentheses[i] += 1
                            opened[i] = False
                            trackers[i] += 1
                        else:
                            counts[i] += 1

                # If we avoided all previous cases, then we do not sample and instead read the pattern
                else:
                    x[i] = self.voc.vocab[current_pattern_indexes[i]]
                    trackers[i] += 1 * (x[i] != self.voc.vocab['EOS']).byte()
                    if (x[i] == self.voc.vocab[')']).byte():
                        opened[i] = False

            sequences.append(x.view(-1, 1))
            log_probs += NLLLoss(log_prob, x)
            entropy += -torch.sum((log_prob * prob), 1)

            x = Variable(x.data)
            EOS_sampled = (x == self.voc.vocab['EOS']).byte()
            finished = torch.ge(finished + EOS_sampled, 1)
            if torch.prod(finished) == 1: break

        sequences = torch.cat(sequences, 1)
        return sequences.data, log_probs, entropy
コード例 #8
0
ファイル: main.py プロジェクト: 1pha/deep_scratch_3
import numpy as np

from utils.Variable import *
from utils.Functions import *

f = Square()
g = Exp()

x = Variable(np.array(0.5))
fx = f(x)
gfx = g(fx)
print(gfx.data)
コード例 #9
0
 def init_h(self, batch_size):
     # Initial cell state is zero
     return Variable(
         torch.zeros(self._num_gru_layers, batch_size,
                     self._gru_layer_size))
コード例 #10
0
def train_agent(restore_agent_from='data/Prior.ckpt',
                scoring_function='activity_model',
                save_dir=None,
                learning_rate=0.0005,
                batch_size=64,
                n_steps=1000,
                sigma=100):

    voc = Vocabulary(init_from_file="data/voc")
    start_time = time.time()

    Prior = RNN(voc)
    Agent = RNN(voc)

    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt'))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Prior.rnn.load_state_dict(
            torch.load('data/Prior.ckpt',
                       map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)
    scoring_function = get_scoring_function(scoring_function=scoring_function)
    step_score = [[], []]
    print("Model initialized, starting training...")

    if not save_dir:
        save_dir = 'experiments/manuscript/1000steps_probtest_rewardonlynosmaller40_' + time.strftime(
            "%Y-%m-%d-%H_%M_%S", time.localtime())
    os.makedirs(save_dir)

    ## calcualte the probability of psmiles with predicted TC >= 0.4
    prob = []
    mean_ = []
    std_ = []
    for step in range(n_steps):
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = []
        for seq in seqs.cpu().numpy():
            smiles.append(voc.decode(seq))
        score = scoring_function(smiles)

        ####
        count = 0
        score_filter = []
        for s in score:
            if s >= 0.4:
                score_filter.append(s)
                count += 1
            else:
                pass
        prob.append(count / 64)
        mean_.append(np.mean(score_filter))
        std_.append(np.std(score_filter))
        ####

        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)
        loss = loss.mean()

        regularization = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * regularization

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print out information during the training
        print("Agent    Prior    Target    Score        SMILES")
        for i in range(10):
            print("{:6.3f}  {:6.3f}  {:6.3f}  {:6.3f}    {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))

        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # if step > 98 and (step+1) % 100 == 0:
        # # if step == 0:
        #     torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'agent_baseline_{}.ckpt'.format(step+1)))

        #     seqs, agent_likelihood, entropy = Agent.sample(1000)
        #     prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        #     prior_likelihood = prior_likelihood.data.cpu().numpy()
        #     smiles = []
        #     for seq in seqs.cpu().numpy():
        #         smiles.append(voc.decode(seq))
        #     score = scoring_function(smiles)
        #     with open(os.path.join(save_dir, "sampled_{}".format(step+1)), 'w') as f:
        #         f.write("SMILES  Score  PriorLogP\n")
        #         for s, sc, pri in zip(smiles, score, prior_likelihood):
        #             f.write("{}  {:5.3f}  {:6.3f}\n".format(s, sc, pri))

    step_score_data = pd.DataFrame({
        'Step': step_score[0],
        'Score': step_score[1],
        'Prob': prob,
        'MEAN': mean_,
        'STD': std_
    })
    step_score_data.to_csv(os.path.join(save_dir, "step_score_1000step.csv"),
                           index=None)
コード例 #11
0
variables = {}

default = OrderedDict()
default['mean'] = 0
default['std'] = 1
positive_bias = OrderedDict()
positive_bias['mean'] = 0.5
positive_bias['std'] = 1
negative_bias = OrderedDict()
negative_bias['mean'] = -0.5
negative_bias['std'] = 1

# Experimental parameters

# Task importance
variables['TI'] = Variable(1, 'TI', 'fixed', {'fixed': 1})

# Controlled parameters
variables['SA'] = Variable(0, 'SA', 'Normal', positive_bias)
variables['PI'] = Variable(0, 'PI', 'Normal', negative_bias)
variables['success'] = 0

# IT Process parameters
variables['skill'] = Variable(1, 'skill', 'Normal', default)
variables['effort'] = Variable(1, 'effort', 'Normal', default)
variables['external'] = Variable(0, 'external', 'Normal', default)
variables['luck'] = Variable(0, 'luck', 'Normal', default)

# Inferencee parameters
variables['L'] = 100
variables['f'] = {'mean': lambda x: x, '2ndMoment': lambda x: x**2}
コード例 #12
0
def train_agent(runname='celecoxib',
                priorname='chembl',
                scoring_function='Tanimoto',
                scoring_function_kwargs=None,
                save_dir=None,
                batch_size=64,
                n_steps=3000,
                num_processes=6,
                sigma=60,
                experience_replay=5,
                lr=0.0005):
    print("\nStarting run %s with prior %s ..." % (runname, priorname))
    start_time = time.time()

    voc = Vocabulary(init_from_file="data/Voc_%s" % priorname)

    prior = RNN(voc)
    agent = RNN(voc)

    writer = SummaryWriter('logs/%s' % runname)

    # By default restore Agent to same model as Prior, but can restore from already trained Agent too.
    # Saved models are partially on the GPU, but if we dont have cuda enabled we can remap these
    # to the CPU.
    if torch.cuda.is_available():
        prior.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
        agent.rnn.load_state_dict(torch.load('data/prior_%s.ckpt' % priorname))
    else:
        prior.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))
        agent.rnn.load_state_dict(
            torch.load('data/prior_%s.ckpt' % priorname,
                       map_location=lambda storage, loc: storage))

    # We dont need gradients with respect to Prior
    for param in prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=lr)

    # Scoring_function
    scoring_function = get_scoring_function(scoring_function=scoring_function,
                                            num_processes=num_processes,
                                            **scoring_function_kwargs)

    # For policy based RL, we normally train on-policy and correct for the fact that more likely actions
    # occur more often (which means the agent can get biased towards them). Using experience replay is
    # therefor not as theoretically sound as it is for value based RL, but it seems to work well.
    experience = Experience(voc)

    print("Model initialized, starting training...")

    for step in range(n_steps):
        # Sample from Agent
        seqs, agent_likelihood, entropy = agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_ids = unique(seqs)
        seqs = seqs[unique_ids]
        agent_likelihood = agent_likelihood[unique_ids]
        entropy = entropy[unique_ids]

        # Get prior likelihood and score
        prior_likelihood, _ = prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience Replay
        # First sample
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = agent.likelihood(
                exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow(
                (Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
            agent_likelihood = torch.cat(
                (agent_likelihood, exp_agent_likelihood), 0)

        # Then add new experience
        prior_likelihood = prior_likelihood.data.cpu().numpy()
        new_experience = zip(smiles, score, prior_likelihood)
        best_memory = experience.add_experience(new_experience)

        # Calculate loss
        loss = loss.mean()

        # Add regularizer that penalizes high likelihood for the entire sequence
        loss_p = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * loss_p

        # Calculate gradients and make an update to the network weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Convert to numpy arrays so that we can print them
        augmented_likelihood = augmented_likelihood.data.cpu().numpy()
        agent_likelihood = agent_likelihood.data.cpu().numpy()

        # Print some information for this step
        time_elapsed = (time.time() - start_time) / 3600
        time_left = (time_elapsed * ((n_steps - step) / (step + 1)))
        print(
            "\n       Step {}   Fraction valid SMILES: {:4.1f}  Time elapsed: {:.2f}h Time left: {:.2f}h\n"
            .format(step,
                    fraction_valid_smiles(smiles) * 100, time_elapsed,
                    time_left))
        print("  Agent    Prior   Target   Score             SMILES")
        for i in range(10):
            print(" {:6.2f}   {:6.2f}  {:6.2f}  {:6.2f}     {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))

        # Log
        writer.add_scalar('loss', loss.item(), step)
        writer.add_scalar('score', np.mean(score), step)
        writer.add_scalar('entropy', entropy.mean(), step)
        if best_memory:
            writer.add_scalar('best_memory', best_memory, step)

        # get 4 random valid smiles and scores for logging
        val_ids = np.array(
            [i for i, s in enumerate(smiles) if is_valid_mol(s)])
        val_ids = np.random.choice(val_ids, 4, replace=False)
        smiles = np.array(smiles)[val_ids]
        score = ['%.3f' % s for s in np.array(score)[val_ids]]
        writer.add_image('generated_mols', mol_to_torchimage(smiles, score),
                         step)

    # If the entire training finishes, we create a new folder where we save this python file
    # as well as some sampled sequences and the contents of the experinence (which are the highest
    # scored sequences seen during training)
    if not save_dir:
        save_dir = 'results/%s' % runname + time.strftime(
            "%Y-%m-%d-%H_%M_%S", time.localtime())
    os.makedirs(save_dir)
    copyfile('agent.py', os.path.join(save_dir, "agent_%s.py" % runname))

    experience.print_memory(os.path.join(save_dir, "memory"))
    torch.save(agent.rnn.state_dict(),
               os.path.join(save_dir, 'Agent_%s.ckpt' % runname))

    seqs, agent_likelihood, entropy = agent.sample(256)
    prior_likelihood, _ = prior.likelihood(Variable(seqs))
    prior_likelihood = prior_likelihood.data.cpu().numpy()
    smiles = seq_to_smiles(seqs, voc)
    score = scoring_function(smiles)
    with open(os.path.join(save_dir, "sampled.txt"), 'w') as f:
        f.write("SMILES Score PriorLogP\n")
        for smiles, score, prior_likelihood in zip(smiles, score,
                                                   prior_likelihood):
            f.write("{} {:5.2f} {:6.2f}\n".format(smiles, score,
                                                  prior_likelihood))

    print("\nDONE! Whole run took %s" %
          datetime.timedelta(seconds=time.time() - start_time))
コード例 #13
0
def poem_to_tensor(poem, vocab, is_target=False):
    word_indexes = [vocab.index(word) for word in poem]
    if is_target:
        word_indexes.append(vocab.index('<EOP>'))
    return Variable(torch.LongTensor(word_indexes))