def fit(self, train_loader, valid_loader, out, epochs=100, lr=1e-4): """Training the DNN model, similar to the scikit-learn or Keras style. In the end, the optimal value of parameters will also be persisted on the hard drive. Arguments: train_loader (DataLoader): Data loader for training set, including m X n target FloatTensor and m X l label FloatTensor (m is the No. of sample, n is the No. of features, l is the No. of classes or tasks) valid_loader (DataLoader): Data loader for validation set. The data structure is as same as loader_train. out (str): the file path for the model file (suffix with '.pkg') and log file (suffix with '.log'). epochs(int, optional): The maximum of training epochs (default: 100) lr (float, optional): learning rate (default: 1e-4) """ if 'optim' in self.__dict__: optimizer = self.optim else: optimizer = optim.Adam(self.parameters(), lr=lr) # record the minimum loss value based on the calculation of loss function by the current epoch best_loss = np.inf # record the epoch when optimal model is saved. last_save = 0 log = open(out + '.log', 'w') for epoch in range(epochs): t0 = time.time() for param_group in optimizer.param_groups: param_group['lr'] = lr * (1 - 1 / epochs) ** (epoch * 10) for i, (Xb, yb) in enumerate(train_loader): # Batch of target tenor and label tensor Xb, yb = Xb.to(util.getDev()), yb.to(util.getDev()) optimizer.zero_grad() # predicted probability tensor y_ = self.forward(Xb, istrain=True) # ignore all of the NaN values ix = yb == yb yb, y_ = yb[ix], y_[ix] # loss function calculation based on predicted tensor and label tensor loss = self.criterion(y_, yb) loss.backward() optimizer.step() # loss value on validation set based on which optimal model is saved. loss_valid = self.evaluate(valid_loader) print('[Epoch: %d/%d] %.1fs loss_train: %f loss_valid: %f' % ( epoch, epochs, time.time() - t0, loss.item(), loss_valid), file=log) if loss_valid < best_loss: torch.save(self.state_dict(), out + '.pkg') print('[Performance] loss_valid is improved from %f to %f, Save model to %s' % (best_loss, loss_valid, out + '.pkg'), file=log) best_loss = loss_valid last_save = epoch else: print('[Performance] loss_valid is not improved.', file=log) # early stopping, if the performance on validation is not improved in 100 epochs. # The model training will stop in order to save time. if epoch - last_save > 100: break log.close() self.load_state_dict(torch.load(out + '.pkg'))
def init_h(self, batch_size): """Initialize the hidden states for the Recurrent layers. Arguments: batch_size (int): the No. of sample for each batch iterated by data loader. Results: h (FloatTensor): the hidden states for the recurrent layers initialization. c (FloatTensor): the LSTM cell states for the recurrent layers initialization, this tensor is only for LSTM based recurrent layers. """ h = torch.zeros(3, batch_size, self.hidden_size).to(util.getDev()) c = torch.zeros(3, batch_size, self.hidden_size).to(util.getDev()) return (h, c) if self.is_lstm else h
def __call__(self, environ: Environ, exploit: PolicyAwareGenerator, explore=None): """Training generator under reinforcement learning framework, The rewoard is only the final reward given by environment (predictor). agent (model.Generator): the exploitation network for SMILES string generation environ (util.Activity): the environment provide the final reward for each SMILES explore (model.Generator): the exploration network for SMILES string generation, it has the same architecture with the agent. """ smiles, valids, seqs = exploit.sample( self.batch_size , explore=explore , epsilon=self.epsilon , include_tensors=True , mc=self.mc ) # obtaining the reward preds = environ.predictSMILES(smiles) preds[valids == False] = 0 preds -= self.beta preds = torch.Tensor(preds.reshape(-1, 1)).to(util.getDev()) ds = TensorDataset(seqs, preds) loader = DataLoader(ds, batch_size=self.batch_size) # Training Loop for seq, pred in loader: exploit.policyUpdate(seq, pred)
def sample(self, batch_size, epsilon=0.01, explore=None): """Token selection based on the probability distribution for molecule generation Arguments: batch_size (int): the No. of sample to be generated for each time. epsilon (float, optional): the exploration rate, it determines the percentage of contribution the exploration network make. explore (Generator optional): exploration network, it has the same neural architecture with the exploitation network. Returns: sequences (LongTensor): m X n matrix that contains the index of tokens from vocaburary for SMILES sequence construction. m is the No. of samples, n is the maximum sequence length. """ # Start tokens x = torch.LongTensor([self.voc.tk2ix['GO']] * batch_size).to(util.getDev()) # Hidden states initialization for exploitation network h = self.init_h(batch_size) # Hidden states initialization for exploration network h1 = self.init_h(batch_size) # Initialization of output matrix sequences = torch.zeros(batch_size, self.voc.max_len).long().to(util.getDev()) # labels to judge and record which sample is ended is_end = torch.zeros(batch_size).byte().to(util.getDev()) for step in range(self.voc.max_len): with torch.no_grad(): logit, h = self(x, h) if explore: logit1, h1 = explore(x, h1) loc = (torch.rand(batch_size, 1) < epsilon).expand(logit.size()).to(util.getDev()) logit[loc] = logit1[loc] proba = logit.softmax(dim=-1) # sampling based on output probability distribution x = torch.multinomial(proba, 1).view(-1) x[is_end] = self.voc.tk2ix['EOS'] sequences[:, step] = x # Judging whether samples are end or not. end_token = (x == self.voc.tk2ix['EOS']) is_end = torch.ge(is_end + end_token, 1) # If all of the samples generation being end, stop the sampling process if (is_end == 1).all(): break return sequences
def evaluate(self, loader): """Evaluating the performance of the DNN model. Arguments: loader (torch.utils.data.DataLoader): data loader for test set, including m X n target FloatTensor and l X n label FloatTensor (m is the No. of sample, n is the No. of features, l is the No. of classes or tasks) Return: loss (float): the average loss value based on the calculation of loss function with given test set. """ loss = 0 for Xb, yb in loader: Xb, yb = Xb.to(util.getDev()), yb.to(util.getDev()) y_ = self.forward(Xb) ix = yb == yb yb, y_ = yb[ix], y_[ix] loss += self.criterion(y_, yb).item() loss = loss / len(loader) return loss
def __init__(self, vocab_size, emb_dim, filter_sizes, num_filters, dropout=0.25): super(Discriminator, self).__init__() self.emb = nn.Embedding(vocab_size, emb_dim) self.convs = nn.ModuleList([ nn.Conv2d(1, n, (f, emb_dim)) for (n, f) in zip(num_filters, filter_sizes) ]) self.highway = nn.Linear(sum(num_filters), sum(num_filters)) self.dropout = nn.Dropout(p=dropout) self.lin = nn.Linear(sum(num_filters), 1) self.sigmoid = nn.Sigmoid() self.init_parameters() self.to(util.getDev()) self.optim = torch.optim.Adam(filter(lambda p: p.requires_grad, self.parameters()))
def likelihood(self, target): """Calculating the probability for generating each token in each SMILES string Arguments: target (LongTensor): m X n LongTensor of SMILES string representation. m is the No. of samples, n is the maximum size of the tokens for the whole SMILES strings. Returns: scores (FloatTensor): m X n LongTensor of the probability for for generating each token in each SMILES string. m is the No. of samples, n is n is the maximum size of the tokens for the whole SMILES strings """ batch_size, seq_len = target.size() x = torch.LongTensor([self.voc.tk2ix['GO']] * batch_size).to(util.getDev()) h = self.init_h(batch_size) scores = torch.zeros(batch_size, seq_len).to(util.getDev()) for step in range(seq_len): logits, h = self(x, h) score = logits.log_softmax(dim=-1) score = score.gather(1, target[:, step:step+1]).squeeze() scores[:, step] = score x = target[:, step] return scores
def __init__(self, voc, monitor=None, embed_size=128, hidden_size=512, is_lstm=True): super(Generator, self).__init__() self.monitors = [monitor] if monitor else [] self.voc = voc self.embed_size = embed_size self.hidden_size = hidden_size self.output_size = voc.size self.is_lstm = is_lstm self.embed = nn.Embedding(voc.size, embed_size) if is_lstm: self.rnn = nn.LSTM(embed_size, hidden_size, num_layers=3, batch_first=True) else: self.rnn = nn.GRU(embed_size, hidden_size, num_layers=3, batch_first=True) self.linear = nn.Linear(hidden_size, voc.size) self.optim = torch.optim.Adam(self.parameters()) self.to(util.getDev())
def __init__(self, n_dim, n_task, is_reg=False): super(MTFullyConnected, self).__init__() self.n_task = n_task self.dropout = nn.Dropout(0.25) self.fc0 = nn.Linear(n_dim, 8000) self.fc1 = nn.Linear(8000, 4000) self.fc2 = nn.Linear(4000, 2000) self.output = nn.Linear(2000, n_task) self.is_reg = is_reg if is_reg: # loss function for regression self.criterion = nn.MSELoss() else: # loss function and activation function of output layer for multiple classification self.criterion = nn.BCELoss() self.activation = nn.Sigmoid() self.to(util.getDev())
def predict(self, loader): """Predicting the probability of each sample in the given dataset. Arguments: loader (torch.utils.data.DataLoader): data loader for test set, only including m X n target FloatTensor (m is the No. of sample, n is the No. of features) Return: score (ndarray): probability of each sample in the given dataset, it is a m X l FloatTensor (m is the No. of sample, l is the No. of classes or tasks.) """ score = [] for Xb, yb in loader: Xb = Xb.to(util.getDev()) y_ = self.forward(Xb) score.append(y_.detach().cpu()) score = torch.cat(score, dim=0).numpy() return score
def __init__(self, n_dim, n_class, is_reg=False): super(STFullyConnected, self).__init__() self.dropout = nn.Dropout(0.25) self.fc0 = nn.Linear(n_dim, 8000) self.fc1 = nn.Linear(8000, 4000) self.fc2 = nn.Linear(4000, 2000) self.fc3 = nn.Linear(2000, n_class) self.is_reg = is_reg if is_reg: # loss function for regression self.criterion = nn.MSELoss() elif n_class == 1: # loss function and activation function of output layer for binary classification self.criterion = nn.BCELoss() self.activation = nn.Sigmoid() else: # loss function and activation function of output layer for multiple classification self.criterion = nn.CrossEntropyLoss() self.activation = nn.Softmax() self.to(util.getDev())
def fit(self, loader_train, loader_valid=None, epochs=100, lr=1e-3,*, monitor_freq=10): """Training the RNN generative model, similar to the scikit-learn or Keras style. In the end, the optimal value of parameters will also be persisted on the hard drive. Arguments: loader_train (DataLoader): Data loader for training set, it contains Dataset with util.MolData; for each iteration, the output batch is m X n LongTensor, m is the No. of samples, n is the maximum length of sequences. out_path (str): the file path for the model file loader_valid (DataLoader, optional): Data loader for validation set. The data structure is as same as loader_train. and log file (suffix with '.log'). epochs(int, optional): The maximum of training epochs (default: 100) lr (float, optional): learning rate (default: 1e-3) """ optimizer = optim.Adam(self.parameters(), lr=lr) best_error = np.inf total_epochs = epochs total_batches = len(loader_train) total_steps = total_batches * total_epochs current_step = 0 for epoch in trange(epochs, desc='Epoch'): for i, batch in enumerate(loader_train): current_step += 1 current_batch = i optimizer.zero_grad() loss_train = self.likelihood(batch.to(util.getDev())) loss_train = -loss_train.mean() loss_train.backward() optimizer.step() # Performance Evaluation current_loss_valid = None if (monitor_freq > 0 and i % monitor_freq == 0) or loader_valid is not None: # 1000 SMILES is sampled seqs = self.sample(1000) # ix = util.unique(seqs) # seqs = seqs[ix] # Checking the validation of each SMILES smiles, valids = util.check_smiles(seqs, self.voc) error = 1 - sum(valids) / len(seqs) current_loss_train = loss_train.item() current_error_rate = error # Saving the optimal parameter of the model with minimum loss value. is_best = False if loader_valid is not None: # If the validation set is given, the loss function will be # calculated on the validation set. loss_valid, size = 0, 0 for j, inner_batch in enumerate(loader_valid): size += inner_batch.size(0) with torch.no_grad(): loss_valid += -self.likelihood(inner_batch.to(util.getDev())).sum() loss_valid = loss_valid / size / self.voc.max_len current_loss_valid = loss_valid.item() if current_loss_valid < best_error: is_best = True best_error = current_loss_valid elif error < best_error: # If the validation is not given, the loss function will be # just based on the training set. is_best = True best_error = error # feed monitoring info for monitor in self.monitors: monitor.model(self) monitor.state(self.state_dict(), is_best) monitor.performance(current_loss_train, current_loss_valid, current_error_rate, best_error) for j, smile in enumerate(smiles): monitor.smiles(smile, valids[j]) monitor.finalizeStep(epoch+1, current_batch, current_step, total_epochs, total_batches, total_steps) for monitor in self.monitors: monitor.close()