def evaluate(model: MoleculeModel, data_loader: MoleculeDataLoader, num_tasks: int, metric_func: Callable, dataset_type: str, scaler: StandardScaler = None, logger: logging.Logger = None) -> List[float]: """ Evaluates an ensemble of models on a dataset by making predictions and then evaluating the predictions. :param model: A :class:`~chemprop.models.model.MoleculeModel`. :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`. :param num_tasks: Number of tasks. :param metric_func: Metric function which takes in a list of targets and a list of predictions. :param dataset_type: Dataset type. :param scaler: A :class:`~chemprop.features.scaler.StandardScaler` object fit on the training targets. :param logger: A logger to record output. :return: A list with the score for each task based on :code:`metric_func`. """ preds = predict(model=model, data_loader=data_loader, scaler=scaler) targets = data_loader.targets() results = evaluate_predictions(preds=preds, targets=targets, num_tasks=num_tasks, metric_func=metric_func, dataset_type=dataset_type, logger=logger) return results
def evaluate(model: nn.Module, data_loader: MoleculeDataLoader, num_tasks: int, metric_func: Callable, dataset_type: str, scaler: StandardScaler = None, logger: logging.Logger = None) -> List[float]: """ Evaluates an ensemble of models on a dataset. :param model: A model. :param data_loader: A MoleculeDataLoader. :param num_tasks: Number of tasks. :param metric_func: Metric function which takes in a list of targets and a list of predictions. :param dataset_type: Dataset type. :param scaler: A StandardScaler object fit on the training targets. :param logger: Logger. :return: A list with the score for each task based on `metric_func`. """ preds = predict(model=model, data_loader=data_loader, scaler=scaler) targets = data_loader.targets() results = evaluate_predictions(preds=preds, targets=targets, num_tasks=num_tasks, metric_func=metric_func, dataset_type=dataset_type, logger=logger) return results
def evaluate(model: nn.Module, data_loader: MoleculeDataLoader, num_tasks: int, loss_func: Callable, metric_func: Callable, args: TrainArgs, dataset_type: str, scaler: StandardScaler = None, logger: logging.Logger = None) -> List[float]: """ Evaluates an ensemble of models on a dataset. :param model: A model. :param data_loader: A MoleculeDataLoader. :param num_tasks: Number of tasks. :param metric_func: Metric function which takes in a list of targets and a list of predictions. :param dataset_type: Dataset type. :param scaler: A StandardScaler object fit on the training targets. :param logger: Logger. :return: A list with the score for each task based on `metric_func`. """ preds, feature = predict(model=model, data_loader=data_loader, scaler=scaler) targets = data_loader.targets() results = evaluate_predictions(preds=preds, targets=targets, num_tasks=num_tasks, metric_func=metric_func, dataset_type=dataset_type, logger=logger) loss_sum, iter_count = 0, 0 for batch in tqdm(data_loader, total=len(data_loader)): # Prepare batch mol_batch, features_batch, target_batch = batch.batch_graph( ), batch.features(), batch.targets() mask = torch.Tensor([[x is not None for x in tb] for tb in target_batch]) targets = torch.Tensor([[0 if x is None else x for x in tb] for tb in target_batch]) # Run model preds, _ = model(mol_batch, features_batch) # Move tensors to correct device mask = mask.to(preds.device) targets = targets.to(preds.device) class_weights = torch.ones(targets.shape, device=preds.device) if args.dataset_type == 'multiclass': targets = targets.long() loss = torch.cat([ loss_func(preds[:, target_index, :], targets[:, target_index]).unsqueeze(1) for target_index in range(preds.size(1)) ], dim=1) * class_weights * mask else: loss = loss_func(preds, targets) * class_weights * mask loss = loss.sum() / mask.sum() loss_sum += loss.item() iter_count += len(batch) loss_cut_count = loss_sum / iter_count return results, loss_cut_count