def main(): voc = util.Voc(init_from_file="data/voc_b.txt") netR_path = 'output/rf_dis.pkg' netG_path = 'output/net_p' netD_path = 'output/net_d' agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10, BATCH_SIZE, MC) netR = util.Environment(netR_path) agent = model.Generator(voc) agent.load_state_dict(T.load(netG_path + '.pkg')) df = pd.read_table('data/CHEMBL251.txt') df = df[df['PCHEMBL_VALUE'] >= 6.5] data = util.MolData(df, voc) loader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, collate_fn=data.collate_fn) netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER) if not os.path.exists(netD_path + '.pkg'): Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path) netD.load_state_dict(T.load(netD_path + '.pkg')) best_score = 0 log = open(agent_path + '.log', 'w') for epoch in range(1000): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) print('\nPolicy Gradient Training Generator : ') Train_GAN(agent, netD, netR) print('\nAdversarial Training Discriminator : ') Train_dis_BCE(netD, agent, loader, epochs=1) seqs = agent.sample(1000) ix = util.unique(seqs) smiles, valids = util.check_smiles(seqs[ix], agent.voc) scores = netR(smiles) scores[valids == False] = 0 unique = (scores >= 0.5).sum() / 1000 if best_score < unique: T.save(agent.state_dict(), agent_path + '.pkg') best_score = unique print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, scores.mean(), valids.mean(), unique), file=log) for i, smile in enumerate(smiles): print('%f\t%s' % (scores[i], smile), file=log) for param_group in agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def rollout_pg(agent, environ, explore=None, *, batch_size, baseline, mc, epsilon): """Training generator under reinforcement learning framework. The reward is given for each token in the SMILES, which is generated by Monte Carlo Tree Search based on final reward given by the environment. Arguments: agent (model.Generator): the exploitation network for SMILES string generation environ (util.Activity): the environment provide the final reward for each SMILES explore (model.Generator): the exploration network for SMILES string generation, it has the same architecture with the agent. """ agent.optim.zero_grad() seqs = agent.sample(batch_size, explore=explore, epsilon=epsilon) batch_size = seqs.size(0) seq_len = seqs.size(1) rewards = np.zeros((batch_size, seq_len)) smiles, valids = util.check_smiles(seqs, agent.voc) preds = environ(smiles) - baseline preds[valids == False] = - baseline scores, hiddens = agent.likelihood(seqs) # Monte Carlo Tree Search for step rewards generation for _ in trange(mc): for i in range(seq_len): if (seqs[:, i] != 0).any(): h = hiddens[:, :, i, :] subseqs = agent.sample(batch_size, inits=(seqs[:, i], h, i + 1, None)) subseqs = torch.cat([seqs[:, :i+1], subseqs], dim=1) subsmile, subvalid = util.check_smiles(subseqs, voc=agent.voc) subpred = environ(subsmile) - baseline subpred[1 - subvalid] = -baseline else: subpred = preds rewards[:, i] += subpred loss = agent.PGLoss(scores, seqs, torch.FloatTensor(rewards / mc)) loss.backward() agent.optim.step() return 0, valids.mean(), smiles, preds
def sample(self, n_samples, explore=None, mc=1, epsilon=0.01, include_tensors=False): seqs = [] # repeated sampling with MC times for _ in range(mc): seq = self.model.sample(n_samples, explore=explore.model if explore else None, epsilon=epsilon) seqs.append(seq) seqs = torch.cat(seqs, dim=0) ix = util.unique(seqs) seqs = seqs[ix] smiles, valids = util.check_smiles(seqs, self.corpus.voc) if include_tensors: return smiles, valids, seqs else: return smiles, valids
def Train_GAN(netG, netD, netR, sigma=SIGMA): seqs = [] for _ in range(MC): seq = netG.sample(BATCH_SIZE) seqs.append(seq) seqs = T.cat(seqs, dim=0) ix = util.unique(seqs) seqs = seqs[ix] smiles, valids = util.check_smiles(seqs, netG.voc) preds = sigma * netR(smiles) + (1 - sigma) * netD( util.Variable(seqs)).data.cpu().numpy()[:, 0] preds[valids == False] = 0 preds -= BL ds = TensorDataset(seqs, T.Tensor(preds.reshape(-1, 1))) loader = DataLoader(ds, batch_size=BATCH_SIZE) for seq, pred in loader: score, _ = netG.likelihood(seq) netG.optim.zero_grad() loss = netG.PGLoss(score, seq, pred) loss.backward() netG.optim.step()
def fit(self, loader_train, loader_valid=None, epochs=100, lr=1e-3,*, monitor_freq=10): """Training the RNN generative model, similar to the scikit-learn or Keras style. In the end, the optimal value of parameters will also be persisted on the hard drive. Arguments: loader_train (DataLoader): Data loader for training set, it contains Dataset with util.MolData; for each iteration, the output batch is m X n LongTensor, m is the No. of samples, n is the maximum length of sequences. out_path (str): the file path for the model file loader_valid (DataLoader, optional): Data loader for validation set. The data structure is as same as loader_train. and log file (suffix with '.log'). epochs(int, optional): The maximum of training epochs (default: 100) lr (float, optional): learning rate (default: 1e-3) """ optimizer = optim.Adam(self.parameters(), lr=lr) best_error = np.inf total_epochs = epochs total_batches = len(loader_train) total_steps = total_batches * total_epochs current_step = 0 for epoch in trange(epochs, desc='Epoch'): for i, batch in enumerate(loader_train): current_step += 1 current_batch = i optimizer.zero_grad() loss_train = self.likelihood(batch.to(util.getDev())) loss_train = -loss_train.mean() loss_train.backward() optimizer.step() # Performance Evaluation current_loss_valid = None if (monitor_freq > 0 and i % monitor_freq == 0) or loader_valid is not None: # 1000 SMILES is sampled seqs = self.sample(1000) # ix = util.unique(seqs) # seqs = seqs[ix] # Checking the validation of each SMILES smiles, valids = util.check_smiles(seqs, self.voc) error = 1 - sum(valids) / len(seqs) current_loss_train = loss_train.item() current_error_rate = error # Saving the optimal parameter of the model with minimum loss value. is_best = False if loader_valid is not None: # If the validation set is given, the loss function will be # calculated on the validation set. loss_valid, size = 0, 0 for j, inner_batch in enumerate(loader_valid): size += inner_batch.size(0) with torch.no_grad(): loss_valid += -self.likelihood(inner_batch.to(util.getDev())).sum() loss_valid = loss_valid / size / self.voc.max_len current_loss_valid = loss_valid.item() if current_loss_valid < best_error: is_best = True best_error = current_loss_valid elif error < best_error: # If the validation is not given, the loss function will be # just based on the training set. is_best = True best_error = error # feed monitoring info for monitor in self.monitors: monitor.model(self) monitor.state(self.state_dict(), is_best) monitor.performance(current_loss_train, current_loss_valid, current_error_rate, best_error) for j, smile in enumerate(smiles): monitor.smiles(smile, valids[j]) monitor.finalizeStep(epoch+1, current_batch, current_step, total_epochs, total_batches, total_steps) for monitor in self.monitors: monitor.close()