예제 #1
0
def main():
    voc = util.Voc(init_from_file="data/voc_b.txt")
    netR_path = 'output/rf_dis.pkg'
    netG_path = 'output/net_p'
    netD_path = 'output/net_d'
    agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10,
                                                 BATCH_SIZE, MC)

    netR = util.Environment(netR_path)

    agent = model.Generator(voc)
    agent.load_state_dict(T.load(netG_path + '.pkg'))

    df = pd.read_table('data/CHEMBL251.txt')
    df = df[df['PCHEMBL_VALUE'] >= 6.5]
    data = util.MolData(df, voc)
    loader = DataLoader(data,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        drop_last=True,
                        collate_fn=data.collate_fn)

    netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER)
    if not os.path.exists(netD_path + '.pkg'):
        Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path)
    netD.load_state_dict(T.load(netD_path + '.pkg'))

    best_score = 0
    log = open(agent_path + '.log', 'w')
    for epoch in range(1000):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        print('\nPolicy Gradient Training Generator : ')
        Train_GAN(agent, netD, netR)

        print('\nAdversarial Training Discriminator : ')
        Train_dis_BCE(netD, agent, loader, epochs=1)

        seqs = agent.sample(1000)
        ix = util.unique(seqs)
        smiles, valids = util.check_smiles(seqs[ix], agent.voc)
        scores = netR(smiles)
        scores[valids == False] = 0
        unique = (scores >= 0.5).sum() / 1000
        if best_score < unique:
            T.save(agent.state_dict(), agent_path + '.pkg')
            best_score = unique
        print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" %
              (epoch, scores.mean(), valids.mean(), unique),
              file=log)
        for i, smile in enumerate(smiles):
            print('%f\t%s' % (scores[i], smile), file=log)

        for param_group in agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)

    log.close()
예제 #2
0
def rollout_pg(agent, environ, explore=None, *, batch_size, baseline, mc, epsilon):
    """Training generator under reinforcement learning framework.

    The reward is given for each token in the SMILES, which is generated by
    Monte Carlo Tree Search based on final reward given by the environment.

    Arguments:

        agent (model.Generator): the exploitation network for SMILES string generation
        environ (util.Activity): the environment provide the final reward for each SMILES
        explore (model.Generator): the exploration network for SMILES string generation,
            it has the same architecture with the agent.
    """

    agent.optim.zero_grad()
    seqs = agent.sample(batch_size, explore=explore, epsilon=epsilon)
    batch_size = seqs.size(0)
    seq_len = seqs.size(1)
    rewards = np.zeros((batch_size, seq_len))
    smiles, valids = util.check_smiles(seqs, agent.voc)
    preds = environ(smiles) - baseline
    preds[valids == False] = - baseline
    scores, hiddens = agent.likelihood(seqs)

    # Monte Carlo Tree Search for step rewards generation
    for _ in trange(mc):
        for i in range(seq_len):
            if (seqs[:, i] != 0).any():
                h = hiddens[:, :, i, :]
                subseqs = agent.sample(batch_size, inits=(seqs[:, i], h, i + 1, None))
                subseqs = torch.cat([seqs[:, :i+1], subseqs], dim=1)
                subsmile, subvalid = util.check_smiles(subseqs, voc=agent.voc)
                subpred = environ(subsmile) - baseline
                subpred[1 - subvalid] = -baseline
            else:
                subpred = preds
            rewards[:, i] += subpred
    loss = agent.PGLoss(scores, seqs, torch.FloatTensor(rewards / mc))
    loss.backward()
    agent.optim.step()
    return 0, valids.mean(), smiles, preds
예제 #3
0
    def sample(self, n_samples, explore=None, mc=1, epsilon=0.01, include_tensors=False):
        seqs = []

        # repeated sampling with MC times
        for _ in range(mc):
            seq = self.model.sample(n_samples, explore=explore.model if explore else None, epsilon=epsilon)
            seqs.append(seq)
        seqs = torch.cat(seqs, dim=0)
        ix = util.unique(seqs)
        seqs = seqs[ix]
        smiles, valids = util.check_smiles(seqs, self.corpus.voc)
        if include_tensors:
            return smiles, valids, seqs
        else:
            return smiles, valids
예제 #4
0
def Train_GAN(netG, netD, netR, sigma=SIGMA):
    seqs = []
    for _ in range(MC):
        seq = netG.sample(BATCH_SIZE)
        seqs.append(seq)
    seqs = T.cat(seqs, dim=0)
    ix = util.unique(seqs)
    seqs = seqs[ix]
    smiles, valids = util.check_smiles(seqs, netG.voc)
    preds = sigma * netR(smiles) + (1 - sigma) * netD(
        util.Variable(seqs)).data.cpu().numpy()[:, 0]
    preds[valids == False] = 0
    preds -= BL
    ds = TensorDataset(seqs, T.Tensor(preds.reshape(-1, 1)))
    loader = DataLoader(ds, batch_size=BATCH_SIZE)
    for seq, pred in loader:
        score, _ = netG.likelihood(seq)
        netG.optim.zero_grad()
        loss = netG.PGLoss(score, seq, pred)
        loss.backward()
        netG.optim.step()
예제 #5
0
    def fit(self, loader_train, loader_valid=None, epochs=100, lr=1e-3,*, monitor_freq=10):
        """Training the RNN generative model, similar to the scikit-learn or Keras style.

        In the end, the optimal value of parameters will also be persisted on the hard drive.

        Arguments:
            loader_train (DataLoader): Data loader for training set, it contains
            Dataset with util.MolData; for each iteration, the output batch is
            m X n LongTensor, m is the No. of samples, n is the maximum length
            of sequences.
            out_path (str): the file path for the model file
            loader_valid (DataLoader, optional): Data loader for validation set.
                The data structure is as same as loader_train.
                and log file (suffix with '.log').
            epochs(int, optional): The maximum of training epochs (default: 100)
            lr (float, optional): learning rate (default: 1e-3)
        """
        optimizer = optim.Adam(self.parameters(), lr=lr)
        best_error = np.inf
        total_epochs = epochs
        total_batches = len(loader_train)
        total_steps = total_batches * total_epochs
        current_step = 0
        for epoch in trange(epochs, desc='Epoch'):
            for i, batch in enumerate(loader_train):
                current_step += 1
                current_batch = i
                optimizer.zero_grad()
                loss_train = self.likelihood(batch.to(util.getDev()))
                loss_train = -loss_train.mean()
                loss_train.backward()
                optimizer.step()
                # Performance Evaluation
                current_loss_valid = None
                if (monitor_freq > 0 and i % monitor_freq == 0) or loader_valid is not None:
                    # 1000 SMILES is sampled
                    seqs = self.sample(1000)
                    # ix = util.unique(seqs)
                    # seqs = seqs[ix]
                    # Checking the validation of each SMILES
                    smiles, valids = util.check_smiles(seqs, self.voc)
                    error = 1 - sum(valids) / len(seqs)

                    current_loss_train = loss_train.item()
                    current_error_rate = error
                    # Saving the optimal parameter of the model with minimum loss value.
                    is_best = False
                    if loader_valid is not None:
                        # If the validation set is given, the loss function will be
                        # calculated on the validation set.
                        loss_valid, size = 0, 0
                        for j, inner_batch in enumerate(loader_valid):
                            size += inner_batch.size(0)
                            with torch.no_grad():
                                loss_valid += -self.likelihood(inner_batch.to(util.getDev())).sum()

                        loss_valid = loss_valid / size / self.voc.max_len
                        current_loss_valid = loss_valid.item()
                        if current_loss_valid < best_error:
                            is_best = True
                            best_error = current_loss_valid
                    elif error < best_error:
                        # If the validation is not given, the loss function will be
                        # just based on the training set.
                        is_best = True
                        best_error = error

                    # feed monitoring info
                    for monitor in self.monitors:
                        monitor.model(self)
                        monitor.state(self.state_dict(), is_best)
                        monitor.performance(current_loss_train, current_loss_valid, current_error_rate, best_error)
                        for j, smile in enumerate(smiles):
                            monitor.smiles(smile, valids[j])
                        monitor.finalizeStep(epoch+1, current_batch, current_step, total_epochs, total_batches, total_steps)

        for monitor in self.monitors:
            monitor.close()