Example #1
0
    def fit(self, train_loader, valid_loader, out, epochs=100, lr=1e-4):
        """Training the DNN model, similar to the scikit-learn or Keras style.
        In the end, the optimal value of parameters will also be persisted on the hard drive.
        
        Arguments:
            train_loader (DataLoader): Data loader for training set,
                including m X n target FloatTensor and m X l label FloatTensor
                (m is the No. of sample, n is the No. of features, l is the No. of classes or tasks)
            valid_loader (DataLoader): Data loader for validation set.
                The data structure is as same as loader_train.
            out (str): the file path for the model file (suffix with '.pkg')
                and log file (suffix with '.log').
            epochs(int, optional): The maximum of training epochs (default: 100)
            lr (float, optional): learning rate (default: 1e-4)
        """

        if 'optim' in self.__dict__:
            optimizer = self.optim
        else:
            optimizer = optim.Adam(self.parameters(), lr=lr)
        # record the minimum loss value based on the calculation of loss function by the current epoch
        best_loss = np.inf
        # record the epoch when optimal model is saved.
        last_save = 0
        log = open(out + '.log', 'w')
        for epoch in range(epochs):
            t0 = time.time()
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr * (1 - 1 / epochs) ** (epoch * 10)
            for i, (Xb, yb) in enumerate(train_loader):
                # Batch of target tenor and label tensor
                Xb, yb = Xb.to(util.getDev()), yb.to(util.getDev())
                optimizer.zero_grad()
                # predicted probability tensor
                y_ = self.forward(Xb, istrain=True)

                # ignore all of the NaN values
                ix = yb == yb
                yb, y_ = yb[ix], y_[ix]
                # loss function calculation based on predicted tensor and label tensor
                loss = self.criterion(y_, yb)
                loss.backward()
                optimizer.step()
            # loss value on validation set based on which optimal model is saved.
            loss_valid = self.evaluate(valid_loader)
            print('[Epoch: %d/%d] %.1fs loss_train: %f loss_valid: %f' % (
                    epoch, epochs, time.time() - t0, loss.item(), loss_valid), file=log)
            if loss_valid < best_loss:
                torch.save(self.state_dict(), out + '.pkg')
                print('[Performance] loss_valid is improved from %f to %f, Save model to %s' %
                      (best_loss, loss_valid, out + '.pkg'), file=log)
                best_loss = loss_valid
                last_save = epoch
            else:
                print('[Performance] loss_valid is not improved.', file=log)
                # early stopping, if the performance on validation is not improved in 100 epochs.
                # The model training will stop in order to save time.
                if epoch - last_save > 100: break
        log.close()
        self.load_state_dict(torch.load(out + '.pkg'))
Example #2
0
    def init_h(self, batch_size):
        """Initialize the hidden states for the Recurrent layers.

        Arguments:
            batch_size (int): the No. of sample for each batch iterated by data loader.

        Results:
            h (FloatTensor): the hidden states for the recurrent layers initialization.
            c (FloatTensor): the LSTM cell states for the recurrent layers initialization,
                this tensor is only for LSTM based recurrent layers.
        """
        h = torch.zeros(3, batch_size, self.hidden_size).to(util.getDev())
        c = torch.zeros(3, batch_size, self.hidden_size).to(util.getDev())
        return (h, c) if self.is_lstm else h
Example #3
0
    def __call__(self, environ: Environ, exploit: PolicyAwareGenerator, explore=None):
        """Training generator under reinforcement learning framework,
        The rewoard is only the final reward given by environment (predictor).

        agent (model.Generator): the exploitation network for SMILES string generation
        environ (util.Activity): the environment provide the final reward for each SMILES
        explore (model.Generator): the exploration network for SMILES string generation,
            it has the same architecture with the agent.
        """

        smiles, valids, seqs = exploit.sample(
            self.batch_size
            , explore=explore
            , epsilon=self.epsilon
            , include_tensors=True
            , mc=self.mc
        )

        # obtaining the reward
        preds = environ.predictSMILES(smiles)
        preds[valids == False] = 0
        preds -= self.beta
        preds = torch.Tensor(preds.reshape(-1, 1)).to(util.getDev())

        ds = TensorDataset(seqs, preds)
        loader = DataLoader(ds, batch_size=self.batch_size)

        # Training Loop
        for seq, pred in loader:
            exploit.policyUpdate(seq, pred)
Example #4
0
    def sample(self, batch_size, epsilon=0.01, explore=None):
        """Token selection based on the probability distribution for molecule generation

        Arguments:
            batch_size (int): the No. of sample to be generated for each time.
            epsilon (float, optional): the exploration rate, it determines the percentage of
                contribution the exploration network make.
            explore (Generator optional): exploration network, it has the same neural architecture with
                the exploitation network.

        Returns:
            sequences (LongTensor): m X n matrix that contains the index of tokens from vocaburary
                for SMILES sequence construction. m is the No. of samples, n is the maximum sequence
                length.
        """
        # Start tokens
        x = torch.LongTensor([self.voc.tk2ix['GO']] * batch_size).to(util.getDev())
        # Hidden states initialization for exploitation network
        h = self.init_h(batch_size)
        # Hidden states initialization for exploration network
        h1 = self.init_h(batch_size)
        # Initialization of output matrix
        sequences = torch.zeros(batch_size, self.voc.max_len).long().to(util.getDev())
        # labels to judge and record which sample is ended
        is_end = torch.zeros(batch_size).byte().to(util.getDev())

        for step in range(self.voc.max_len):
            with torch.no_grad():
                logit, h = self(x, h)
                if explore:
                    logit1, h1 = explore(x, h1)
                    loc = (torch.rand(batch_size, 1) < epsilon).expand(logit.size()).to(util.getDev())
                    logit[loc] = logit1[loc]
                proba = logit.softmax(dim=-1)
                # sampling based on output probability distribution
                x = torch.multinomial(proba, 1).view(-1)

                x[is_end] = self.voc.tk2ix['EOS']
                sequences[:, step] = x

                # Judging whether samples are end or not.
                end_token = (x == self.voc.tk2ix['EOS'])
                is_end = torch.ge(is_end + end_token, 1)
                #  If all of the samples generation being end, stop the sampling process
                if (is_end == 1).all(): break
        return sequences
Example #5
0
    def evaluate(self, loader):
        """Evaluating the performance of the DNN model.

        Arguments:
            loader (torch.utils.data.DataLoader): data loader for test set,
                including m X n target FloatTensor and l X n label FloatTensor
                (m is the No. of sample, n is the No. of features, l is the No. of classes or tasks)

        Return:
            loss (float): the average loss value based on the calculation of loss function with given test set.
        """
        loss = 0
        for Xb, yb in loader:
            Xb, yb = Xb.to(util.getDev()), yb.to(util.getDev())
            y_ = self.forward(Xb)
            ix = yb == yb
            yb, y_ = yb[ix], y_[ix]
            loss += self.criterion(y_, yb).item()
        loss = loss / len(loader)
        return loss
Example #6
0
    def __init__(self, vocab_size, emb_dim, filter_sizes, num_filters, dropout=0.25):
        super(Discriminator, self).__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, emb_dim)) for (n, f) in zip(num_filters, filter_sizes)
        ])
        self.highway = nn.Linear(sum(num_filters), sum(num_filters))
        self.dropout = nn.Dropout(p=dropout)
        self.lin = nn.Linear(sum(num_filters), 1)
        self.sigmoid = nn.Sigmoid()
        self.init_parameters()

        self.to(util.getDev())
        self.optim = torch.optim.Adam(filter(lambda p: p.requires_grad, self.parameters()))
Example #7
0
    def likelihood(self, target):
        """Calculating the probability for generating each token in each SMILES string

        Arguments:
            target (LongTensor): m X n LongTensor of SMILES string representation.
                m is the No. of samples, n is the maximum size of the tokens for
                the whole SMILES strings.

        Returns:
            scores (FloatTensor): m X n LongTensor of the probability for for generating
                each token in each SMILES string. m is the No. of samples, n is n is
                the maximum size of the tokens for the whole SMILES strings
        """
        batch_size, seq_len = target.size()
        x = torch.LongTensor([self.voc.tk2ix['GO']] * batch_size).to(util.getDev())
        h = self.init_h(batch_size)
        scores = torch.zeros(batch_size, seq_len).to(util.getDev())
        for step in range(seq_len):
            logits, h = self(x, h)
            score = logits.log_softmax(dim=-1)
            score = score.gather(1, target[:, step:step+1]).squeeze()
            scores[:, step] = score
            x = target[:, step]
        return scores
Example #8
0
 def __init__(self, voc, monitor=None, embed_size=128, hidden_size=512, is_lstm=True):
     super(Generator, self).__init__()
     self.monitors = [monitor] if monitor else []
     self.voc = voc
     self.embed_size = embed_size
     self.hidden_size = hidden_size
     self.output_size = voc.size
     self.is_lstm = is_lstm
     
     self.embed = nn.Embedding(voc.size, embed_size)
     if is_lstm:
         self.rnn = nn.LSTM(embed_size, hidden_size, num_layers=3, batch_first=True)
     else:
         self.rnn = nn.GRU(embed_size, hidden_size, num_layers=3, batch_first=True)
     self.linear = nn.Linear(hidden_size, voc.size)
     self.optim = torch.optim.Adam(self.parameters())
     self.to(util.getDev())
Example #9
0
 def __init__(self, n_dim, n_task, is_reg=False):
     super(MTFullyConnected, self).__init__()
     self.n_task = n_task
     self.dropout = nn.Dropout(0.25)
     self.fc0 = nn.Linear(n_dim, 8000)
     self.fc1 = nn.Linear(8000, 4000)
     self.fc2 = nn.Linear(4000, 2000)
     self.output = nn.Linear(2000, n_task)
     self.is_reg = is_reg
     if is_reg:
         # loss function for regression
         self.criterion = nn.MSELoss()
     else:
         # loss function and activation function of output layer for multiple classification
         self.criterion = nn.BCELoss()
         self.activation = nn.Sigmoid()
     self.to(util.getDev())
Example #10
0
    def predict(self, loader):
        """Predicting the probability of each sample in the given dataset.

        Arguments:
            loader (torch.utils.data.DataLoader): data loader for test set,
                only including m X n target FloatTensor
                (m is the No. of sample, n is the No. of features)

        Return:
            score (ndarray): probability of each sample in the given dataset,
                it is a m X l FloatTensor (m is the No. of sample, l is the No. of classes or tasks.)
        """
        score = []
        for Xb, yb in loader:
            Xb = Xb.to(util.getDev())
            y_ = self.forward(Xb)
            score.append(y_.detach().cpu())
        score = torch.cat(score, dim=0).numpy()
        return score
Example #11
0
 def __init__(self, n_dim, n_class, is_reg=False):
     super(STFullyConnected, self).__init__()
     self.dropout = nn.Dropout(0.25)
     self.fc0 = nn.Linear(n_dim, 8000)
     self.fc1 = nn.Linear(8000, 4000)
     self.fc2 = nn.Linear(4000, 2000)
     self.fc3 = nn.Linear(2000, n_class)
     self.is_reg = is_reg
     if is_reg:
         # loss function for regression
         self.criterion = nn.MSELoss()
     elif n_class == 1:
         # loss function and activation function of output layer for binary classification
         self.criterion = nn.BCELoss()
         self.activation = nn.Sigmoid()
     else:
         # loss function and activation function of output layer for multiple classification
         self.criterion = nn.CrossEntropyLoss()
         self.activation = nn.Softmax()
     self.to(util.getDev())
Example #12
0
    def fit(self, loader_train, loader_valid=None, epochs=100, lr=1e-3,*, monitor_freq=10):
        """Training the RNN generative model, similar to the scikit-learn or Keras style.

        In the end, the optimal value of parameters will also be persisted on the hard drive.

        Arguments:
            loader_train (DataLoader): Data loader for training set, it contains
            Dataset with util.MolData; for each iteration, the output batch is
            m X n LongTensor, m is the No. of samples, n is the maximum length
            of sequences.
            out_path (str): the file path for the model file
            loader_valid (DataLoader, optional): Data loader for validation set.
                The data structure is as same as loader_train.
                and log file (suffix with '.log').
            epochs(int, optional): The maximum of training epochs (default: 100)
            lr (float, optional): learning rate (default: 1e-3)
        """
        optimizer = optim.Adam(self.parameters(), lr=lr)
        best_error = np.inf
        total_epochs = epochs
        total_batches = len(loader_train)
        total_steps = total_batches * total_epochs
        current_step = 0
        for epoch in trange(epochs, desc='Epoch'):
            for i, batch in enumerate(loader_train):
                current_step += 1
                current_batch = i
                optimizer.zero_grad()
                loss_train = self.likelihood(batch.to(util.getDev()))
                loss_train = -loss_train.mean()
                loss_train.backward()
                optimizer.step()
                # Performance Evaluation
                current_loss_valid = None
                if (monitor_freq > 0 and i % monitor_freq == 0) or loader_valid is not None:
                    # 1000 SMILES is sampled
                    seqs = self.sample(1000)
                    # ix = util.unique(seqs)
                    # seqs = seqs[ix]
                    # Checking the validation of each SMILES
                    smiles, valids = util.check_smiles(seqs, self.voc)
                    error = 1 - sum(valids) / len(seqs)

                    current_loss_train = loss_train.item()
                    current_error_rate = error
                    # Saving the optimal parameter of the model with minimum loss value.
                    is_best = False
                    if loader_valid is not None:
                        # If the validation set is given, the loss function will be
                        # calculated on the validation set.
                        loss_valid, size = 0, 0
                        for j, inner_batch in enumerate(loader_valid):
                            size += inner_batch.size(0)
                            with torch.no_grad():
                                loss_valid += -self.likelihood(inner_batch.to(util.getDev())).sum()

                        loss_valid = loss_valid / size / self.voc.max_len
                        current_loss_valid = loss_valid.item()
                        if current_loss_valid < best_error:
                            is_best = True
                            best_error = current_loss_valid
                    elif error < best_error:
                        # If the validation is not given, the loss function will be
                        # just based on the training set.
                        is_best = True
                        best_error = error

                    # feed monitoring info
                    for monitor in self.monitors:
                        monitor.model(self)
                        monitor.state(self.state_dict(), is_best)
                        monitor.performance(current_loss_train, current_loss_valid, current_error_rate, best_error)
                        for j, smile in enumerate(smiles):
                            monitor.smiles(smile, valids[j])
                        monitor.finalizeStep(epoch+1, current_batch, current_step, total_epochs, total_batches, total_steps)

        for monitor in self.monitors:
            monitor.close()