def predict(model: nn.Module, data: MoleculeDataset, batch_size: int, scaler: StandardScaler = None, uncertainty: bool = False) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param batch_size: Batch size. :param scaler: A StandardScaler object fit on the training targets. :param uncertainty: Whether uncertainty values should be returned. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] num_iters, iter_step = len(data), batch_size for i in trange(0, num_iters, iter_step): # Prepare batch mol_batch = MoleculeDataset(data[i:i + batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model batch = smiles_batch with torch.no_grad(): batch_preds = model(batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) if model.uncertainty: p = [] c = [] for i in range(len(preds)): p.append([preds[i][j] for j in range(len(preds[i])) if j % 2 == 0]) c.append([preds[i][j] for j in range(len(preds[i])) if j % 2 == 1]) if scaler is not None: p = scaler.inverse_transform(p).tolist() c = (scaler.stds**2 * c).tolist() if uncertainty: return p, c return p if scaler is not None: preds = scaler.inverse_transform(preds).tolist() return preds
def save_predictions(save_dir: str, train_data: MolPairDataset, val_data: MolPairDataset, test_data: MolPairDataset, train_preds: List[List[float]], val_preds: List[List[float]], test_preds: List[List[float]], task_names: List[str], scaler: StandardScaler = None) -> None: """ Saves predictions to csv file for entire model. Any of the datasets can be absent. They will not be saved in that case. """ with open(os.path.join(save_dir, 'preds.csv'), 'w') as f: writer = csv.writer(f) header = ['SMILE1', 'SMILE2', 'SPLIT'] + task_names + ['PRED_' + task for task in task_names] writer.writerow(header) splits = ['train', 'val', 'test'] dataSplits = [train_data, val_data, test_data] predSplits = [train_preds, val_preds, test_preds] for k, split in enumerate(splits): if dataSplits[k] is None: continue smiles = dataSplits[k].smiles() targets = dataSplits[k].targets() # Inverse scale if regression and only for training data if k == 0 and scaler is not None: targets = scaler.inverse_transform(targets) preds = predSplits[k] for i in range(len(smiles)): row = [smiles[i][0], smiles[i][1], split] + targets[i] + preds[i] writer.writerow(row)
def predict_MCdepth(model: nn.Module, data_loader: MoleculeDataLoader, args: TrainArgs, scaler: StandardScaler, d) -> List[List[float]]: """ makes a random prediction given a certain depth, d """ # set model to eval mode model.eval() preds = [] for batch in data_loader: batch: MoleculeDataset mol_batch, features_batch = batch.batch_graph(), batch.features() with torch.no_grad(): _, batch_preds_list, _, _ = model(mol_batch, features_batch, sample=True) batch_preds = batch_preds_list[d] batch_preds = batch_preds.data.cpu().numpy() if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) batch_preds = batch_preds.tolist() preds.extend(batch_preds) return preds
def predict(model: nn.Module, data: MoleculeDataset, batch_size: int, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param batch_size: Batch size. :param scaler: A StandardScaler object fit on the training targets. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] num_iters, iter_step = len(data), batch_size smiles_batch_all = [] for i in trange(0, num_iters, iter_step): # Prepare batch mol_batch = MoleculeDataset(data[i:i + batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model batch = smiles_batch with torch.no_grad(): batch_preds = model(batch, features_batch) batch_preds = [x.data.cpu().numpy() for x in batch_preds] # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors preds.append(batch_preds) smiles_batch_all.extend(smiles_batch) preds = [np.concatenate(x) for x in zip(*preds)] return preds, smiles_batch_all
def predict(model: nn.Module, data: MoleculeDataset, batch_size: int, disable_progress_bar: bool = False, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param batch_size: Batch size. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A StandardScaler object fit on the training targets. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] num_iters, iter_step = len(data), batch_size for i in trange(0, num_iters, iter_step, disable=disable_progress_bar): # Prepare batch mol_batch = MoleculeDataset(data[i:i + batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model batch = smiles_batch with torch.no_grad(): batch_preds = model(batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) return preds
def predict(model: MoleculeModel, data_loader: MoleculeDataLoader, disable_progress_bar: bool = False, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A :class:`~chemprop.models.model.MoleculeModel`. :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets. :return: A list of lists of predictions. The outer list is molecules while the inner list is tasks. """ model.eval() preds = [] for batch in tqdm(data_loader, disable=disable_progress_bar, leave=False): # Prepare batch batch: MoleculeDataset mol_batch, features_batch, target_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch, smiles_batch = \ batch.batch_graph(), batch.features(), batch.targets(), batch.atom_descriptors(), \ batch.atom_features(), batch.bond_features(), batch.smiles_one_hot_encoding() # Make predictions with torch.no_grad(): batch_preds = model(mol_batch, features_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch, smiles_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) return preds
def predict(model: nn.Module, data_loader: MoleculeDataLoader, disable_progress_bar: bool = False, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data_loader: A MoleculeDataLoader. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A StandardScaler object fit on the training targets. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] for batch in tqdm(data_loader, disable=disable_progress_bar): # Prepare batch batch: MoleculeDataset mol_batch, features_batch = batch.batch_graph(), batch.features() # Make predictions with torch.no_grad(): batch_preds = model(mol_batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) return preds
def predict(model: nn.Module, data: MoleculeDataset, batch_size: int, sampling_size: int, scaler: StandardScaler = None): # -> Tuple[Union[List[List[float]], None], ...]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param batch_size: Batch size. :param sampling_size: Sampling size for MC-Dropout. :param scaler: A StandardScaler object fit on the training targets. :return: A 3-length tuple for predictions, aleatoric uncertainties and epistemic uncertainties. Each element is a list of lists. The outer list is examples while the inner list is tasks. The second and/or the third element of the tuple can be None if not computed. """ model.eval() preds = [] ale_unc = [] epi_unc = [] features = [] num_iters, iter_step = len(data), batch_size # if aleatoric uncertainty is enabled aleatoric = model.aleatoric # if MC-Dropout mc_dropout = model.mc_dropout for i in trange(0, num_iters, iter_step): # Prepare batch mol_batch = MoleculeDataset(data[i:i + batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model batch = smiles_batch if not aleatoric and not mc_dropout: with torch.no_grad(): batch_preds = model(batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) elif aleatoric and not mc_dropout: with torch.no_grad(): ############################# # batch feature batch_preds, batch_logvar, batch_feature = model( batch, features_batch) ############################# batch_var = torch.exp(batch_logvar) batch_preds = batch_preds.data.cpu().numpy() batch_ale_unc = batch_var.data.cpu().numpy() ############################ batch_feature = batch_feature.data.cpu().numpy() features.extend(batch_feature) ############################ # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) batch_ale_unc = scaler.inverse_transform_variance( batch_ale_unc) # Collect vectors batch_preds = batch_preds.tolist() batch_ale_unc = batch_ale_unc.tolist() preds.extend(batch_preds) ale_unc.extend(batch_ale_unc) elif not aleatoric and mc_dropout: with torch.no_grad(): P_mean = [] for ss in range(sampling_size): batch_preds = model(batch, features_batch) P_mean.append(batch_preds) batch_preds = torch.mean(torch.stack(P_mean), 0) batch_epi_unc = torch.var(torch.stack(P_mean), 0) batch_preds = batch_preds.data.cpu().numpy() batch_epi_unc = batch_epi_unc.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) batch_epi_unc = scaler.inverse_transform_variance( batch_epi_unc) # Collect vectors batch_preds = batch_preds.tolist() batch_epi_unc = batch_epi_unc.tolist() preds.extend(batch_preds) epi_unc.extend(batch_epi_unc) elif aleatoric and mc_dropout: with torch.no_grad(): P_mean = [] P_logvar = [] for ss in range(sampling_size): batch_preds, batch_logvar = model(batch, features_batch) P_mean.append(batch_preds) P_logvar.append(torch.exp(batch_logvar)) batch_preds = torch.mean(torch.stack(P_mean), 0) batch_ale_unc = torch.mean(torch.stack(P_logvar), 0) batch_epi_unc = torch.var(torch.stack(P_mean), 0) batch_preds = batch_preds.data.cpu().numpy() batch_ale_unc = batch_ale_unc.data.cpu().numpy() batch_epi_unc = batch_epi_unc.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) batch_ale_unc = scaler.inverse_transform_variance( batch_ale_unc) batch_epi_unc = scaler.inverse_transform_variance( batch_epi_unc) # Collect vectors batch_preds = batch_preds.tolist() batch_ale_unc = batch_ale_unc.tolist() batch_epi_unc = batch_epi_unc.tolist() preds.extend(batch_preds) ale_unc.extend(batch_ale_unc) epi_unc.extend(batch_epi_unc) if not aleatoric and not mc_dropout: return preds, None, None, features elif aleatoric and not mc_dropout: return preds, ale_unc, None, features elif not aleatoric and mc_dropout: return preds, None, epi_unc, features elif aleatoric and mc_dropout: return preds, ale_unc, epi_unc, features
def predict(model: nn.Module, data: MoleculeDataset, args: Namespace, scaler: StandardScaler = None, bert_save_memory: bool = False, logger: logging.Logger = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param args: Arguments. :param scaler: A StandardScaler object fit on the training targets. :param bert_save_memory: Store unused predictions as None to avoid unnecessary memory use. :param logger: Logger. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] if args.dataset_type == 'bert_pretraining': features_preds = [] if args.maml: num_iters, iter_step = data.num_tasks() * args.maml_batches_per_epoch, 1 full_targets = [] else: num_iters, iter_step = len(data), args.batch_size if args.parallel_featurization: batch_queue = Queue(args.batch_queue_max_size) exit_queue = Queue(1) batch_process = Process(target=async_mol2graph, args=(batch_queue, data, args, num_iters, iter_step, exit_queue, True)) batch_process.start() currently_loaded_batches = [] for i in trange(0, num_iters, iter_step): if args.maml: task_train_data, task_test_data, task_idx = data.sample_maml_task(args, seed=0) mol_batch = task_test_data smiles_batch, features_batch, targets_batch = task_train_data.smiles(), task_train_data.features(), task_train_data.targets(task_idx) targets = torch.Tensor(targets_batch).unsqueeze(1) if args.cuda: targets = targets.cuda() else: # Prepare batch if args.parallel_featurization: if len(currently_loaded_batches) == 0: currently_loaded_batches = batch_queue.get() mol_batch, featurized_mol_batch = currently_loaded_batches.pop(0) else: mol_batch = MoleculeDataset(data[i:i + args.batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model if args.dataset_type == 'bert_pretraining': batch = mol2graph(smiles_batch, args) batch.bert_mask(mol_batch.mask()) else: batch = smiles_batch if args.maml: # TODO refactor with train loop model.zero_grad() intermediate_preds = model(batch, features_batch) loss = get_loss_func(args)(intermediate_preds, targets) loss = loss.sum() / len(batch) grad = torch.autograd.grad(loss, [p for p in model.parameters() if p.requires_grad]) theta = [p for p in model.named_parameters() if p[1].requires_grad] # comes in same order as grad theta_prime = {p[0]: p[1] - args.maml_lr * grad[i] for i, p in enumerate(theta)} for name, nongrad_param in [p for p in model.named_parameters() if not p[1].requires_grad]: theta_prime[name] = nongrad_param + torch.zeros(nongrad_param.size()).to(nongrad_param) model_prime = build_model(args=args, params=theta_prime) smiles_batch, features_batch, targets_batch = task_test_data.smiles(), task_test_data.features(), task_test_data.targets(task_idx) # no mask since we only picked data points that have the desired target with torch.no_grad(): batch_preds = model_prime(smiles_batch, features_batch) full_targets.extend([[t] for t in targets_batch]) else: with torch.no_grad(): if args.parallel_featurization: previous_graph_input_mode = model.encoder.graph_input model.encoder.graph_input = True # force model to accept already processed input batch_preds = model(featurized_mol_batch, features_batch) model.encoder.graph_input = previous_graph_input_mode else: batch_preds = model(batch, features_batch) if args.dataset_type == 'bert_pretraining': if batch_preds['features'] is not None: features_preds.extend(batch_preds['features'].data.cpu().numpy()) batch_preds = batch_preds['vocab'] if args.dataset_type == 'kernel': batch_preds = batch_preds.view(int(batch_preds.size(0)/2), 2, batch_preds.size(1)) batch_preds = model.kernel_output_layer(batch_preds) batch_preds = batch_preds.data.cpu().numpy() if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) if args.dataset_type == 'regression_with_binning': batch_preds = batch_preds.reshape((batch_preds.shape[0], args.num_tasks, args.num_bins)) indices = np.argmax(batch_preds, axis=2) preds.extend(indices.tolist()) else: batch_preds = batch_preds.tolist() if args.dataset_type == 'bert_pretraining' and bert_save_memory: for atom_idx, mask_val in enumerate(mol_batch.mask()): if mask_val != 0: batch_preds[atom_idx] = None # not going to predict, so save some memory when passing around preds.extend(batch_preds) if args.dataset_type == 'regression_with_binning': preds = args.bin_predictions[np.array(preds)].tolist() if args.dataset_type == 'bert_pretraining': preds = { 'features': features_preds if len(features_preds) > 0 else None, 'vocab': preds } if args.parallel_featurization: exit_queue.put(0) # dummy var to get the subprocess to know that we're done batch_process.join() if args.maml: # return the task targets here to guarantee alignment; # there's probably no reasonable scenario where we'd use MAML directly to predict something that's actually unknown return preds, full_targets return preds
def predict(model: nn.Module, data_loader: MoleculeDataLoader, disable_progress_bar: bool = False, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data_loader: A MoleculeDataLoader. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A StandardScaler object fit on the training targets. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ UQ = model.uncertainty two_vals = UQ == 'Dropout_VI' or UQ == 'Ensemble' mve = model.mve training = model.training if UQ != 'Dropout_VI': model.eval() total_batch_preds = [] total_var_preds = [] for batch in tqdm(data_loader, disable=disable_progress_bar): # Prepare batch batch: MoleculeDataset mol_batch, features_batch = batch.batch_graph(), batch.features() # Make predictions if two_vals: with torch.no_grad(): batch_preds, logvar_preds = model(mol_batch, features_batch) var_preds = torch.exp(logvar_preds) var_preds = var_preds.data.cpu().numpy().tolist() total_var_preds.extend(var_preds) else: with torch.no_grad(): batch_preds = model(mol_batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Collect vectors batch_preds = batch_preds.tolist() total_batch_preds.extend(batch_preds) if mve: p = [] c = [] for i in range(len(total_batch_preds)): p.append([ total_batch_preds[i][j] for j in range(len(total_batch_preds[i])) if j % 2 == 0 ]) c.append([ total_batch_preds[i][j] for j in range(len(total_batch_preds[i])) if j % 2 == 1 ]) if scaler is not None: p = scaler.inverse_transform(p).tolist() c = (scaler.stds**2 * c).tolist() if not training: return p, c else: return p # Inverse scale if regression if scaler is not None: total_batch_preds = scaler.inverse_transform( total_batch_preds).tolist() if not UQ or training or not two_vals: return total_batch_preds else: return total_batch_preds, total_var_preds
def predict( model: MoleculeModel, data_loader: MoleculeDataLoader, disable_progress_bar: bool = False, scaler: StandardScaler = None, return_unc_parameters: bool = False, dropout_prob: float = 0.0, ) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A :class:`~chemprop.models.model.MoleculeModel`. :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets. :param return_unc_parameters: A bool indicating whether additional uncertainty parameters would be returned alongside the mean predictions. :param dropout_prob: For use during uncertainty prediction only. The propout probability used in generating a dropout ensemble. :return: A list of lists of predictions. The outer list is molecules while the inner list is tasks. If returning uncertainty parameters as well, it is a tuple of lists of lists, of a length depending on how many uncertainty parameters are appropriate for the loss function. """ model.eval() # Activate dropout layers to work during inference for uncertainty estimation if dropout_prob > 0.0: def activate_dropout_(model): return activate_dropout(model, dropout_prob) model.apply(activate_dropout_) preds = [] var, lambdas, alphas, betas = [], [], [], [ ] # only used if returning uncertainty parameters for batch in tqdm(data_loader, disable=disable_progress_bar, leave=False): # Prepare batch batch: MoleculeDataset mol_batch = batch.batch_graph() features_batch = batch.features() atom_descriptors_batch = batch.atom_descriptors() atom_features_batch = batch.atom_features() bond_features_batch = batch.bond_features() # Make predictions with torch.no_grad(): batch_preds = model( mol_batch, features_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch, ) batch_preds = batch_preds.data.cpu().numpy() if model.loss_function == "mve": batch_preds, batch_var = np.split(batch_preds, 2, axis=1) elif model.loss_function == "dirichlet": if model.classification: batch_alphas = np.reshape( batch_preds, [batch_preds.shape[0], batch_preds.shape[1] // 2, 2]) batch_preds = batch_alphas[:, :, 1] / np.sum( batch_alphas, axis=2) # shape(data, tasks, 2) elif model.multiclass: batch_alphas = batch_preds batch_preds = batch_preds / np.sum( batch_alphas, axis=2, keepdims=True) # shape(data, tasks, num_classes) elif model.loss_function == 'evidential': # regression batch_preds, batch_lambdas, batch_alphas, batch_betas = np.split( batch_preds, 4, axis=1) # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) if model.loss_function == "mve": batch_var = batch_var * scaler.stds**2 elif model.loss_function == "evidential": batch_betas = batch_betas * scaler.stds**2 # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) if model.loss_function == "mve": var.extend(batch_var.tolist()) elif model.loss_function == "dirichlet" and model.classification: alphas.extend(batch_alphas.tolist()) elif model.loss_function == "evidential": # regression lambdas.extend(batch_lambdas.tolist()) alphas.extend(batch_alphas.tolist()) betas.extend(batch_betas.tolist()) if return_unc_parameters: if model.loss_function == "mve": return preds, var elif model.loss_function == "dirichlet": return preds, alphas elif model.loss_function == "evidential": return preds, lambdas, alphas, betas return preds
def predict(model: nn.Module, data: MoleculeDataset, batch_size: int, scaler: StandardScaler = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param batch_size: Batch size. :param scaler: A StandardScaler object fit on the training targets. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] check_fp = [] # wei, for batch problem check_fp_d0 = [] # wei, for batch problem check_fp_d1 = [] # wei, for batch problem check_fp_d2 = [] # wei, for batch problem check_fp_final = [] # wei, for batch problem check_fp_mol = [] # wei, for batch problem num_iters, iter_step = len(data), batch_size for i in trange(0, num_iters, iter_step): # Prepare batch mol_batch = MoleculeDataset(data[i:i + batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model batch = smiles_batch with torch.no_grad(): batch_preds = model(batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) # wei, for batch problem each_fp = model.output_fp.tolist() #print('each_fp:', each_fp) remain = num_iters % batch_size if len(each_fp) / 5 == remain: # wei, /5 for d0, d1, d2, final, mol check_fp_d0.extend(each_fp[:remain]) check_fp_d1.extend(each_fp[remain:remain * 2]) check_fp_d2.extend(each_fp[remain * 2:remain * 3]) check_fp_final.extend(each_fp[remain * 3:remain * 4]) check_fp_mol.extend(each_fp[remain * 4:remain * 5]) else: check_fp_d0.extend(each_fp[:batch_size]) check_fp_d1.extend(each_fp[batch_size:batch_size * 2]) check_fp_d2.extend(each_fp[batch_size * 2:batch_size * 3]) check_fp_final.extend(each_fp[batch_size * 3:batch_size * 4]) check_fp_mol.extend(each_fp[batch_size * 4:batch_size * 5]) check_fp.append(check_fp_d0) check_fp.append(check_fp_d1) check_fp.append(check_fp_d2) check_fp.append(check_fp_final) check_fp.append(check_fp_mol) return preds, check_fp
def predict(model: nn.Module, data_loader: MoleculeDataLoader, args: TrainArgs, disable_progress_bar: bool = False, scaler: StandardScaler = None, test_data: bool = False, gp_sample: bool = False, bbp_sample: bool = False) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data_loader: A MoleculeDataLoader. :param args: Arguments. :param disable_progress_bar: Whether to disable the progress bar. :param scaler: A StandardScaler object fit on the training targets. :param test_data: Flag indicating whether data is test data. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ ### seed to ensure single network sampled across batches if args.thompson: network_seed = np.random.randint(1e15) ########## detection of gp or bayeslinear layer or DUN try: model.gp_layer except: gp = False else: gp = True bbp = False for layer in model.children(): if isinstance(layer, BayesLinear): bbp = True break try: model.log_cat except: dun = False else: dun = True # set model to eval mode model.eval() # enable dropout layers with test data, if args.test_dropout == True if args.test_dropout and test_data: model.apply(enable_dropout) preds = [] #for batch in tqdm(data_loader, disable=disable_progress_bar): for batch in data_loader: # Prepare batch batch: MoleculeDataset mol_batch, features_batch = batch.batch_graph(), batch.features() # Make predictions with torch.no_grad(): if gp: if gp_sample: batch_preds = model(mol_batch, features_batch).sample() else: batch_preds = model(mol_batch, features_batch).mean elif bbp: if dun: batch_preds, _, _, _ = model(mol_batch, features_batch, sample=bbp_sample) else: if args.thompson: torch.manual_seed(network_seed) batch_preds, _ = model(mol_batch, features_batch, sample=bbp_sample) else: if args.thompson: torch.manual_seed(network_seed) batch_preds = model(mol_batch, features_batch) batch_preds = batch_preds.data.cpu().numpy() # Inverse scale if regression if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) # Collect vectors batch_preds = batch_preds.tolist() preds.extend(batch_preds) return preds