예제 #1
0
    def predict_prob_dropout_split(self, data, scaler=None):
        self.net.train()

        preds = []

        num_iters, iter_step = len(data), self.args.batch_size

        for i in range(0, num_iters, iter_step):
            # Prepare batch
            mol_batch = MoleculeDataset(data[i:i + self.args.batch_size])
            smiles_batch, features_batch = mol_batch.smiles(
            ), mol_batch.features()

            # Run model
            batch = smiles_batch

            batch_preds = []
            with torch.no_grad():
                for i in range(self.n_drop):
                    batch_pred, e = self.net(batch, features_batch)
                    batch_preds.append(batch_pred.data.cpu().numpy())

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)

            # Collect vectors
            batch_preds = np.hstack(batch_preds).tolist()
            preds.extend(batch_preds)

        return np.array(preds)
예제 #2
0
    def get_embedding(self, data):
        self.net.eval()

        embedding = []

        num_iters, iter_step = len(data), self.args.batch_size

        for i in range(0, num_iters, iter_step):
            # Prepare batch
            mol_batch = MoleculeDataset(data[i:i + self.args.batch_size])
            smiles_batch, features_batch = mol_batch.smiles(
            ), mol_batch.features()

            # Run model
            batch = smiles_batch

            with torch.no_grad():
                preds, latent = self.net(batch, features_batch)

            latent = latent.data.cpu().numpy()

            # Collect vectors
            latent = latent.tolist()
            embedding.extend(latent)

        return np.array(embedding)
예제 #3
0
    def predict(self, data, scaler=None):
        self.net.eval()

        preds = []

        num_iters, iter_step = len(data), self.args.batch_size

        for i in range(0, num_iters, iter_step):
            # Prepare batch
            mol_batch = MoleculeDataset(data[i:i + self.args.batch_size])
            smiles_batch, features_batch = mol_batch.smiles(
            ), mol_batch.features()

            # Run model
            batch = smiles_batch

            with torch.no_grad():
                batch_preds, e = self.net(batch, features_batch)

            batch_preds = batch_preds.data.cpu().numpy()

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)

            # Collect vectors
            batch_preds = batch_preds.tolist()
            preds.extend(batch_preds)

        return preds
예제 #4
0
    def _train(self, epoch: int, data: Union[MoleculeDataset,
                                             List[MoleculeDataset]],
               n_iter: int) -> int:
        """
        Trains a model for an epoch.
        """
        debug = self.logger.debug if self.logger is not None else print

        debug(f'Running epoch: {epoch}')

        self.net.train()

        data.shuffle()
        loss_sum, iter_count = 0, 0
        num_iters = len(data) // self.args.batch_size * self.args.batch_size
        iter_size = self.args.batch_size

        for i in trange(0, num_iters, iter_size):
            # Prepare batch
            if i + self.args.batch_size > len(data):
                break
            mol_batch = MoleculeDataset(data[i:i + self.args.batch_size])
            smiles_batch, features_batch, target_batch = mol_batch.smiles(
            ), mol_batch.features(), mol_batch.targets()
            batch = smiles_batch
            mask = torch.Tensor([[x is not None for x in tb]
                                 for tb in target_batch])
            targets = torch.Tensor([[0 if x is None else x for x in tb]
                                    for tb in target_batch])

            if next(self.net.parameters()).is_cuda:
                mask, targets = mask.cuda(), targets.cuda()

            class_weights = torch.ones(targets.shape)

            if self.use_cuda:
                class_weights = class_weights.cuda()

            # Run model
            self.net.zero_grad()
            preds, e = self.net(batch, features_batch)

            loss = self.loss_func(preds, targets) * class_weights * mask
            loss = loss.sum() / mask.sum()

            loss_sum += loss.item()
            iter_count += len(mol_batch)

            loss.backward()
            self.optimizer.step()

            if (n_iter // self.args.batch_size
                ) % self.args.learning_rate_decay_steps == 0:
                self.lr_schedule.step()

            n_iter += len(mol_batch)

            # Log and/or add to tensorboard
            if (n_iter // self.args.batch_size) % self.args.log_frequency == 0:
                lrs = self.lr_schedule.get_lr()
                loss_avg = loss_sum / iter_count
                loss_sum, iter_count = 0, 0

                lrs_str = ', '.join(f'lr_{i} = {lr:.4e}'
                                    for i, lr in enumerate(lrs))
                debug(f'Loss = {loss_avg:.4e}, {lrs_str}')

                if self.writer is not None:
                    self.writer.add_scalar('train_loss', loss_avg, n_iter)
                    # for i, lr in enumerate(lrs):
                    #     self.writer.add_scalar(f'learning_rate_{i}', lr, n_iter)

        return n_iter