def predict(model: nn.Module,
            data: MoleculeDataset,
            batch_size: int,
            scaler: StandardScaler = None,
            uncertainty: bool = False) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param batch_size: Batch size.
    :param scaler: A StandardScaler object fit on the training targets.
    :param uncertainty: Whether uncertainty values should be returned.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []

    num_iters, iter_step = len(data), batch_size

    for i in trange(0, num_iters, iter_step):
        # Prepare batch
        mol_batch = MoleculeDataset(data[i:i + batch_size])
        smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        batch = smiles_batch

        with torch.no_grad():
            batch_preds = model(batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    if model.uncertainty:
        p = []
        c = []
        for i in range(len(preds)):
            p.append([preds[i][j] for j in range(len(preds[i])) if j % 2 == 0])
            c.append([preds[i][j] for j in range(len(preds[i])) if j % 2 == 1])

        if scaler is not None:
            p = scaler.inverse_transform(p).tolist()
            c = (scaler.stds**2 * c).tolist()

        if uncertainty:
            return p, c

        return p

    if scaler is not None:
        preds = scaler.inverse_transform(preds).tolist()

    return preds
Example #2
0
def save_predictions(save_dir: str,
                     train_data: MolPairDataset,
                     val_data: MolPairDataset,
                     test_data: MolPairDataset,
                     train_preds: List[List[float]],
                     val_preds: List[List[float]],
                     test_preds: List[List[float]],
                     task_names: List[str],
                     scaler: StandardScaler = None) -> None:
    """
    Saves predictions to csv file for entire model.

    Any of the datasets can be absent. They will not be saved in that case.
    """
    with open(os.path.join(save_dir, 'preds.csv'), 'w') as f:
        writer = csv.writer(f)
        header = ['SMILE1', 'SMILE2', 'SPLIT'] + task_names + ['PRED_' + task for task in task_names]
        writer.writerow(header)

        splits = ['train', 'val', 'test']
        dataSplits = [train_data, val_data, test_data]
        predSplits = [train_preds, val_preds, test_preds]
        for k, split in enumerate(splits):
            if dataSplits[k] is None:
                continue
            smiles = dataSplits[k].smiles()
            targets = dataSplits[k].targets()
            # Inverse scale if regression and only for training data
            if k == 0 and scaler is not None:
                targets = scaler.inverse_transform(targets)

            preds = predSplits[k]
            for i in range(len(smiles)):
                row = [smiles[i][0], smiles[i][1], split] + targets[i] + preds[i]
                writer.writerow(row)
def predict_MCdepth(model: nn.Module, data_loader: MoleculeDataLoader,
                    args: TrainArgs, scaler: StandardScaler,
                    d) -> List[List[float]]:
    """
    makes a random prediction given a certain depth, d
    """

    # set model to eval mode
    model.eval()

    preds = []

    for batch in data_loader:
        batch: MoleculeDataset
        mol_batch, features_batch = batch.batch_graph(), batch.features()

        with torch.no_grad():
            _, batch_preds_list, _, _ = model(mol_batch,
                                              features_batch,
                                              sample=True)

        batch_preds = batch_preds_list[d]
        batch_preds = batch_preds.data.cpu().numpy()

        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds
Example #4
0
def predict(model: nn.Module,
            data: MoleculeDataset,
            batch_size: int,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param batch_size: Batch size.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []

    num_iters, iter_step = len(data), batch_size
    smiles_batch_all = []

    for i in trange(0, num_iters, iter_step):
        # Prepare batch
        mol_batch = MoleculeDataset(data[i:i + batch_size])
        smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        batch = smiles_batch

        with torch.no_grad():
            batch_preds = model(batch, features_batch)

        batch_preds = [x.data.cpu().numpy() for x in batch_preds]

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        preds.append(batch_preds)
        smiles_batch_all.extend(smiles_batch)
    preds = [np.concatenate(x) for x in zip(*preds)]

    return preds, smiles_batch_all
Example #5
0
def predict(model: nn.Module,
            data: MoleculeDataset,
            batch_size: int,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param batch_size: Batch size.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []

    num_iters, iter_step = len(data), batch_size

    for i in trange(0, num_iters, iter_step, disable=disable_progress_bar):
        # Prepare batch
        mol_batch = MoleculeDataset(data[i:i + batch_size])
        smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        batch = smiles_batch

        with torch.no_grad():
            batch_preds = model(batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds
Example #6
0
def predict(model: MoleculeModel,
            data_loader: MoleculeDataLoader,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A :class:`~chemprop.models.model.MoleculeModel`.
    :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets.
    :return: A list of lists of predictions. The outer list is molecules while the inner list is tasks.
    """
    model.eval()

    preds = []

    for batch in tqdm(data_loader, disable=disable_progress_bar, leave=False):
        # Prepare batch
        batch: MoleculeDataset
        mol_batch, features_batch, target_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch, smiles_batch = \
            batch.batch_graph(), batch.features(), batch.targets(), batch.atom_descriptors(), \
            batch.atom_features(), batch.bond_features(), batch.smiles_one_hot_encoding()
        # Make predictions
        with torch.no_grad():
            batch_preds = model(mol_batch, features_batch,
                                atom_descriptors_batch, atom_features_batch,
                                bond_features_batch, smiles_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds
Example #7
0
def predict(model: nn.Module,
            data_loader: MoleculeDataLoader,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data_loader: A MoleculeDataLoader.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []

    for batch in tqdm(data_loader, disable=disable_progress_bar):
        # Prepare batch
        batch: MoleculeDataset
        mol_batch, features_batch = batch.batch_graph(), batch.features()

        # Make predictions
        with torch.no_grad():
            batch_preds = model(mol_batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds
Example #8
0
def predict(model: nn.Module,
            data: MoleculeDataset,
            batch_size: int,
            sampling_size: int,
            scaler: StandardScaler = None):
    # -> Tuple[Union[List[List[float]], None], ...]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param batch_size: Batch size.
    :param sampling_size: Sampling size for MC-Dropout.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A 3-length tuple for predictions, aleatoric uncertainties and epistemic uncertainties.
    Each element is a list of lists. The outer list is examples while the inner list is tasks.
    The second and/or the third element of the tuple can be None if not computed.
    """
    model.eval()

    preds = []
    ale_unc = []
    epi_unc = []
    features = []

    num_iters, iter_step = len(data), batch_size

    # if aleatoric uncertainty is enabled
    aleatoric = model.aleatoric

    # if MC-Dropout
    mc_dropout = model.mc_dropout

    for i in trange(0, num_iters, iter_step):
        # Prepare batch
        mol_batch = MoleculeDataset(data[i:i + batch_size])
        smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        batch = smiles_batch

        if not aleatoric and not mc_dropout:
            with torch.no_grad():
                batch_preds = model(batch, features_batch)
            batch_preds = batch_preds.data.cpu().numpy()

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)

            # Collect vectors
            batch_preds = batch_preds.tolist()
            preds.extend(batch_preds)

        elif aleatoric and not mc_dropout:
            with torch.no_grad():

                #############################
                # batch feature
                batch_preds, batch_logvar, batch_feature = model(
                    batch, features_batch)
                #############################

                batch_var = torch.exp(batch_logvar)
            batch_preds = batch_preds.data.cpu().numpy()
            batch_ale_unc = batch_var.data.cpu().numpy()

            ############################
            batch_feature = batch_feature.data.cpu().numpy()
            features.extend(batch_feature)
            ############################

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)
                batch_ale_unc = scaler.inverse_transform_variance(
                    batch_ale_unc)

            # Collect vectors
            batch_preds = batch_preds.tolist()
            batch_ale_unc = batch_ale_unc.tolist()
            preds.extend(batch_preds)
            ale_unc.extend(batch_ale_unc)

        elif not aleatoric and mc_dropout:
            with torch.no_grad():
                P_mean = []

                for ss in range(sampling_size):
                    batch_preds = model(batch, features_batch)
                    P_mean.append(batch_preds)

                batch_preds = torch.mean(torch.stack(P_mean), 0)
                batch_epi_unc = torch.var(torch.stack(P_mean), 0)

            batch_preds = batch_preds.data.cpu().numpy()
            batch_epi_unc = batch_epi_unc.data.cpu().numpy()

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)
                batch_epi_unc = scaler.inverse_transform_variance(
                    batch_epi_unc)

            # Collect vectors
            batch_preds = batch_preds.tolist()
            batch_epi_unc = batch_epi_unc.tolist()

            preds.extend(batch_preds)
            epi_unc.extend(batch_epi_unc)

        elif aleatoric and mc_dropout:
            with torch.no_grad():
                P_mean = []
                P_logvar = []

                for ss in range(sampling_size):
                    batch_preds, batch_logvar = model(batch, features_batch)
                    P_mean.append(batch_preds)
                    P_logvar.append(torch.exp(batch_logvar))

                batch_preds = torch.mean(torch.stack(P_mean), 0)
                batch_ale_unc = torch.mean(torch.stack(P_logvar), 0)
                batch_epi_unc = torch.var(torch.stack(P_mean), 0)

            batch_preds = batch_preds.data.cpu().numpy()
            batch_ale_unc = batch_ale_unc.data.cpu().numpy()
            batch_epi_unc = batch_epi_unc.data.cpu().numpy()

            # Inverse scale if regression
            if scaler is not None:
                batch_preds = scaler.inverse_transform(batch_preds)
                batch_ale_unc = scaler.inverse_transform_variance(
                    batch_ale_unc)
                batch_epi_unc = scaler.inverse_transform_variance(
                    batch_epi_unc)

            # Collect vectors
            batch_preds = batch_preds.tolist()
            batch_ale_unc = batch_ale_unc.tolist()
            batch_epi_unc = batch_epi_unc.tolist()

            preds.extend(batch_preds)
            ale_unc.extend(batch_ale_unc)
            epi_unc.extend(batch_epi_unc)

    if not aleatoric and not mc_dropout:
        return preds, None, None, features
    elif aleatoric and not mc_dropout:
        return preds, ale_unc, None, features
    elif not aleatoric and mc_dropout:
        return preds, None, epi_unc, features
    elif aleatoric and mc_dropout:
        return preds, ale_unc, epi_unc, features
Example #9
0
def predict(model: nn.Module,
            data: MoleculeDataset,
            args: Namespace,
            scaler: StandardScaler = None,
            bert_save_memory: bool = False,
            logger: logging.Logger = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param args: Arguments.
    :param scaler: A StandardScaler object fit on the training targets.
    :param bert_save_memory: Store unused predictions as None to avoid unnecessary memory use.
    :param logger: Logger.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []
    if args.dataset_type == 'bert_pretraining':
        features_preds = []

    if args.maml:
        num_iters, iter_step = data.num_tasks() * args.maml_batches_per_epoch, 1
        full_targets = []
    else:
        num_iters, iter_step = len(data), args.batch_size
    
    if args.parallel_featurization:
        batch_queue = Queue(args.batch_queue_max_size)
        exit_queue = Queue(1)
        batch_process = Process(target=async_mol2graph, args=(batch_queue, data, args, num_iters, iter_step, exit_queue, True))
        batch_process.start()
        currently_loaded_batches = []

    for i in trange(0, num_iters, iter_step):
        if args.maml:
            task_train_data, task_test_data, task_idx = data.sample_maml_task(args, seed=0)
            mol_batch = task_test_data
            smiles_batch, features_batch, targets_batch = task_train_data.smiles(), task_train_data.features(), task_train_data.targets(task_idx)
            targets = torch.Tensor(targets_batch).unsqueeze(1)
            if args.cuda:
                targets = targets.cuda()
        else:
            # Prepare batch
            if args.parallel_featurization:
                if len(currently_loaded_batches) == 0:
                    currently_loaded_batches = batch_queue.get()
                mol_batch, featurized_mol_batch = currently_loaded_batches.pop(0)
            else:
                mol_batch = MoleculeDataset(data[i:i + args.batch_size])
            smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        if args.dataset_type == 'bert_pretraining':
            batch = mol2graph(smiles_batch, args)
            batch.bert_mask(mol_batch.mask())
        else:
            batch = smiles_batch
        
        if args.maml:  # TODO refactor with train loop
            model.zero_grad()
            intermediate_preds = model(batch, features_batch)
            loss = get_loss_func(args)(intermediate_preds, targets)
            loss = loss.sum() / len(batch)
            grad = torch.autograd.grad(loss, [p for p in model.parameters() if p.requires_grad])
            theta = [p for p in model.named_parameters() if p[1].requires_grad]  # comes in same order as grad
            theta_prime = {p[0]: p[1] - args.maml_lr * grad[i] for i, p in enumerate(theta)}
            for name, nongrad_param in [p for p in model.named_parameters() if not p[1].requires_grad]:
                theta_prime[name] = nongrad_param + torch.zeros(nongrad_param.size()).to(nongrad_param)
            model_prime = build_model(args=args, params=theta_prime)
            smiles_batch, features_batch, targets_batch = task_test_data.smiles(), task_test_data.features(), task_test_data.targets(task_idx)
            # no mask since we only picked data points that have the desired target
            with torch.no_grad():
                batch_preds = model_prime(smiles_batch, features_batch)
            full_targets.extend([[t] for t in targets_batch])
        else:
            with torch.no_grad():
                if args.parallel_featurization:
                    previous_graph_input_mode = model.encoder.graph_input
                    model.encoder.graph_input = True  # force model to accept already processed input
                    batch_preds = model(featurized_mol_batch, features_batch)
                    model.encoder.graph_input = previous_graph_input_mode
                else:
                    batch_preds = model(batch, features_batch)

                if args.dataset_type == 'bert_pretraining':
                    if batch_preds['features'] is not None:
                        features_preds.extend(batch_preds['features'].data.cpu().numpy())
                    batch_preds = batch_preds['vocab']
                
                if args.dataset_type == 'kernel':
                    batch_preds = batch_preds.view(int(batch_preds.size(0)/2), 2, batch_preds.size(1))
                    batch_preds = model.kernel_output_layer(batch_preds)

        batch_preds = batch_preds.data.cpu().numpy()

        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)
        
        if args.dataset_type == 'regression_with_binning':
            batch_preds = batch_preds.reshape((batch_preds.shape[0], args.num_tasks, args.num_bins))
            indices = np.argmax(batch_preds, axis=2)
            preds.extend(indices.tolist())
        else:
            batch_preds = batch_preds.tolist()
            if args.dataset_type == 'bert_pretraining' and bert_save_memory:
                for atom_idx, mask_val in enumerate(mol_batch.mask()):
                    if mask_val != 0:
                        batch_preds[atom_idx] = None  # not going to predict, so save some memory when passing around
            preds.extend(batch_preds)
    
    if args.dataset_type == 'regression_with_binning':
        preds = args.bin_predictions[np.array(preds)].tolist()

    if args.dataset_type == 'bert_pretraining':
        preds = {
            'features': features_preds if len(features_preds) > 0 else None,
            'vocab': preds
        }

    if args.parallel_featurization:
        exit_queue.put(0)  # dummy var to get the subprocess to know that we're done
        batch_process.join()

    if args.maml:
        # return the task targets here to guarantee alignment;
        # there's probably no reasonable scenario where we'd use MAML directly to predict something that's actually unknown
        return preds, full_targets

    return preds
Example #10
0
def predict(model: nn.Module,
            data_loader: MoleculeDataLoader,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data_loader: A MoleculeDataLoader.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """

    UQ = model.uncertainty
    two_vals = UQ == 'Dropout_VI' or UQ == 'Ensemble'
    mve = model.mve
    training = model.training
    if UQ != 'Dropout_VI':
        model.eval()

    total_batch_preds = []
    total_var_preds = []
    for batch in tqdm(data_loader, disable=disable_progress_bar):
        # Prepare batch
        batch: MoleculeDataset
        mol_batch, features_batch = batch.batch_graph(), batch.features()

        # Make predictions
        if two_vals:
            with torch.no_grad():
                batch_preds, logvar_preds = model(mol_batch, features_batch)
            var_preds = torch.exp(logvar_preds)
            var_preds = var_preds.data.cpu().numpy().tolist()
            total_var_preds.extend(var_preds)

        else:
            with torch.no_grad():
                batch_preds = model(mol_batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Collect vectors
        batch_preds = batch_preds.tolist()
        total_batch_preds.extend(batch_preds)

    if mve:
        p = []
        c = []
        for i in range(len(total_batch_preds)):
            p.append([
                total_batch_preds[i][j]
                for j in range(len(total_batch_preds[i])) if j % 2 == 0
            ])
            c.append([
                total_batch_preds[i][j]
                for j in range(len(total_batch_preds[i])) if j % 2 == 1
            ])

        if scaler is not None:
            p = scaler.inverse_transform(p).tolist()
            c = (scaler.stds**2 * c).tolist()

        if not training:
            return p, c
        else:
            return p

    # Inverse scale if regression
    if scaler is not None:
        total_batch_preds = scaler.inverse_transform(
            total_batch_preds).tolist()

    if not UQ or training or not two_vals:
        return total_batch_preds
    else:
        return total_batch_preds, total_var_preds
Example #11
0
def predict(
    model: MoleculeModel,
    data_loader: MoleculeDataLoader,
    disable_progress_bar: bool = False,
    scaler: StandardScaler = None,
    return_unc_parameters: bool = False,
    dropout_prob: float = 0.0,
) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A :class:`~chemprop.models.model.MoleculeModel`.
    :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets.
    :param return_unc_parameters: A bool indicating whether additional uncertainty parameters would be returned alongside the mean predictions.
    :param dropout_prob: For use during uncertainty prediction only. The propout probability used in generating a dropout ensemble.
    :return: A list of lists of predictions. The outer list is molecules while the inner list is tasks. If returning uncertainty parameters as well,
        it is a tuple of lists of lists, of a length depending on how many uncertainty parameters are appropriate for the loss function.
    """
    model.eval()

    # Activate dropout layers to work during inference for uncertainty estimation
    if dropout_prob > 0.0:

        def activate_dropout_(model):
            return activate_dropout(model, dropout_prob)

        model.apply(activate_dropout_)

    preds = []

    var, lambdas, alphas, betas = [], [], [], [
    ]  # only used if returning uncertainty parameters

    for batch in tqdm(data_loader, disable=disable_progress_bar, leave=False):
        # Prepare batch
        batch: MoleculeDataset
        mol_batch = batch.batch_graph()
        features_batch = batch.features()
        atom_descriptors_batch = batch.atom_descriptors()
        atom_features_batch = batch.atom_features()
        bond_features_batch = batch.bond_features()

        # Make predictions
        with torch.no_grad():
            batch_preds = model(
                mol_batch,
                features_batch,
                atom_descriptors_batch,
                atom_features_batch,
                bond_features_batch,
            )

        batch_preds = batch_preds.data.cpu().numpy()

        if model.loss_function == "mve":
            batch_preds, batch_var = np.split(batch_preds, 2, axis=1)
        elif model.loss_function == "dirichlet":
            if model.classification:
                batch_alphas = np.reshape(
                    batch_preds,
                    [batch_preds.shape[0], batch_preds.shape[1] // 2, 2])
                batch_preds = batch_alphas[:, :, 1] / np.sum(
                    batch_alphas, axis=2)  # shape(data, tasks, 2)
            elif model.multiclass:
                batch_alphas = batch_preds
                batch_preds = batch_preds / np.sum(
                    batch_alphas, axis=2,
                    keepdims=True)  # shape(data, tasks, num_classes)
        elif model.loss_function == 'evidential':  # regression
            batch_preds, batch_lambdas, batch_alphas, batch_betas = np.split(
                batch_preds, 4, axis=1)

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)
            if model.loss_function == "mve":
                batch_var = batch_var * scaler.stds**2
            elif model.loss_function == "evidential":
                batch_betas = batch_betas * scaler.stds**2

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)
        if model.loss_function == "mve":
            var.extend(batch_var.tolist())
        elif model.loss_function == "dirichlet" and model.classification:
            alphas.extend(batch_alphas.tolist())
        elif model.loss_function == "evidential":  # regression
            lambdas.extend(batch_lambdas.tolist())
            alphas.extend(batch_alphas.tolist())
            betas.extend(batch_betas.tolist())

    if return_unc_parameters:
        if model.loss_function == "mve":
            return preds, var
        elif model.loss_function == "dirichlet":
            return preds, alphas
        elif model.loss_function == "evidential":
            return preds, lambdas, alphas, betas

    return preds
def predict(model: nn.Module,
            data: MoleculeDataset,
            batch_size: int,
            scaler: StandardScaler = None) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param batch_size: Batch size.
    :param scaler: A StandardScaler object fit on the training targets.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """
    model.eval()

    preds = []
    check_fp = []  # wei, for batch problem
    check_fp_d0 = []  # wei, for batch problem
    check_fp_d1 = []  # wei, for batch problem
    check_fp_d2 = []  # wei, for batch problem
    check_fp_final = []  # wei, for batch problem
    check_fp_mol = []  # wei, for batch problem

    num_iters, iter_step = len(data), batch_size

    for i in trange(0, num_iters, iter_step):
        # Prepare batch
        mol_batch = MoleculeDataset(data[i:i + batch_size])
        smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features()

        # Run model
        batch = smiles_batch

        with torch.no_grad():
            batch_preds = model(batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

        # wei, for batch problem
        each_fp = model.output_fp.tolist()
        #print('each_fp:', each_fp)
        remain = num_iters % batch_size
        if len(each_fp) / 5 == remain:  # wei, /5 for d0, d1, d2, final, mol
            check_fp_d0.extend(each_fp[:remain])
            check_fp_d1.extend(each_fp[remain:remain * 2])
            check_fp_d2.extend(each_fp[remain * 2:remain * 3])
            check_fp_final.extend(each_fp[remain * 3:remain * 4])
            check_fp_mol.extend(each_fp[remain * 4:remain * 5])
        else:
            check_fp_d0.extend(each_fp[:batch_size])
            check_fp_d1.extend(each_fp[batch_size:batch_size * 2])
            check_fp_d2.extend(each_fp[batch_size * 2:batch_size * 3])
            check_fp_final.extend(each_fp[batch_size * 3:batch_size * 4])
            check_fp_mol.extend(each_fp[batch_size * 4:batch_size * 5])
    check_fp.append(check_fp_d0)
    check_fp.append(check_fp_d1)
    check_fp.append(check_fp_d2)
    check_fp.append(check_fp_final)
    check_fp.append(check_fp_mol)

    return preds, check_fp
def predict(model: nn.Module,
            data_loader: MoleculeDataLoader,
            args: TrainArgs,
            disable_progress_bar: bool = False,
            scaler: StandardScaler = None,
            test_data: bool = False,
            gp_sample: bool = False,
            bbp_sample: bool = False) -> List[List[float]]:
    """
    Makes predictions on a dataset using an ensemble of models.

    :param model: A model.
    :param data_loader: A MoleculeDataLoader.
    :param args: Arguments.
    :param disable_progress_bar: Whether to disable the progress bar.
    :param scaler: A StandardScaler object fit on the training targets.
    :param test_data: Flag indicating whether data is test data.
    :return: A list of lists of predictions. The outer list is examples
    while the inner list is tasks.
    """

    ### seed to ensure single network sampled across batches
    if args.thompson:
        network_seed = np.random.randint(1e15)

    ########## detection of gp or bayeslinear layer or DUN

    try:
        model.gp_layer
    except:
        gp = False
    else:
        gp = True

    bbp = False
    for layer in model.children():
        if isinstance(layer, BayesLinear):
            bbp = True
            break

    try:
        model.log_cat
    except:
        dun = False
    else:
        dun = True

    # set model to eval mode
    model.eval()

    # enable dropout layers with test data, if args.test_dropout == True
    if args.test_dropout and test_data:
        model.apply(enable_dropout)

    preds = []

    #for batch in tqdm(data_loader, disable=disable_progress_bar):
    for batch in data_loader:
        # Prepare batch
        batch: MoleculeDataset
        mol_batch, features_batch = batch.batch_graph(), batch.features()

        # Make predictions
        with torch.no_grad():
            if gp:
                if gp_sample:
                    batch_preds = model(mol_batch, features_batch).sample()
                else:
                    batch_preds = model(mol_batch, features_batch).mean
            elif bbp:
                if dun:
                    batch_preds, _, _, _ = model(mol_batch,
                                                 features_batch,
                                                 sample=bbp_sample)
                else:
                    if args.thompson:
                        torch.manual_seed(network_seed)
                    batch_preds, _ = model(mol_batch,
                                           features_batch,
                                           sample=bbp_sample)
            else:
                if args.thompson:
                    torch.manual_seed(network_seed)
                batch_preds = model(mol_batch, features_batch)

        batch_preds = batch_preds.data.cpu().numpy()

        # Inverse scale if regression
        if scaler is not None:
            batch_preds = scaler.inverse_transform(batch_preds)

        # Collect vectors
        batch_preds = batch_preds.tolist()
        preds.extend(batch_preds)

    return preds