Example #1
0
def evaluate(model: nn.Module, data: DataLoader, metric_func: Callable,
             args: Namespace) -> List[float]:
    """
    Evaluates an ensemble of models on a dataset.
    :param model: A model.
    :param data: A GlassDataset.
    :param metric_func: Metric function which takes in a list of targets and a list of predictions.
    :param args: Arguments.
    :return: A list with the score for each task based on `metric_func`.
    """
    targets = []

    with torch.no_grad():
        model.eval()

        preds = []
        for batch in tqdm(data, total=len(data)):

            targets.extend(batch.y.float().unsqueeze(1))

            # Prepare batch
            batch = GlassBatchMolGraph(batch)

            # Run model
            batch_preds = model(batch)
            batch_preds = batch_preds.data.cpu().numpy()

            preds.extend(batch_preds.tolist())

    results = evaluate_predictions(preds=preds,
                                   targets=targets,
                                   metric_func=metric_func,
                                   dataset_type=args.dataset_type)

    return results
Example #2
0
def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC],
                        train_data: MoleculeDataset,
                        test_data: MoleculeDataset,
                        metrics: List[str],
                        args: SklearnTrainArgs,
                        logger: Logger = None) -> List[float]:
    """
    Trains a single-task scikit-learn model, meaning a separate model is trained for each task.

    This is necessary if some tasks have None (unknown) values.

    :param model: The scikit-learn model to train.
    :param train_data: The training data.
    :param test_data: The test data.
    :param metrics: A list of names of metric functions.
    :param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
                 training the scikit-learn model.
    :param logger: A logger to record output.
    :return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task.
    """
    scores = {}
    num_tasks = train_data.num_tasks()
    for task_num in trange(num_tasks):
        # Only get features and targets for molecules where target is not None
        train_features, train_targets = zip(*[(features, targets[task_num])
                                              for features, targets in zip(train_data.features(), train_data.targets())
                                              if targets[task_num] is not None])
        test_features, test_targets = zip(*[(features, targets[task_num])
                                            for features, targets in zip(test_data.features(), test_data.targets())
                                            if targets[task_num] is not None])

        model.fit(train_features, train_targets)

        test_preds = predict(
            model=model,
            model_type=args.model_type,
            dataset_type=args.dataset_type,
            features=test_features
        )
        test_targets = [[target] for target in test_targets]

        score = evaluate_predictions(
            preds=test_preds,
            targets=test_targets,
            num_tasks=1,
            metrics=metrics,
            dataset_type=args.dataset_type,
            logger=logger
        )
        for metric in metrics:
            if metric not in scores:
                scores[metric] = []
            scores[metric].append(score[metric][0])

    return scores
Example #3
0
def multi_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC],
                       train_data: MoleculeDataset,
                       test_data: MoleculeDataset,
                       metrics: List[str],
                       args: SklearnTrainArgs,
                       logger: Logger = None) -> Dict[str, List[float]]:
    """
    Trains a multi-task scikit-learn model, meaning one model is trained simultaneously on all tasks.

    This is only possible if none of the tasks have None (unknown) values.

    :param model: The scikit-learn model to train.
    :param train_data: The training data.
    :param test_data: The test data.
    :param metrics: A list of names of metric functions.
    :param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
                 training the scikit-learn model.
    :param logger: A logger to record output.
    :return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task.
    """
    num_tasks = train_data.num_tasks()

    train_targets = train_data.targets()
    if train_data.num_tasks() == 1:
        train_targets = [targets[0] for targets in train_targets]

    # Train
    model.fit(train_data.features(), train_targets)

    # Save model
    with open(os.path.join(args.save_dir, 'model.pkl'), 'wb') as f:
        pickle.dump(model, f)

    test_preds = predict(
        model=model,
        model_type=args.model_type,
        dataset_type=args.dataset_type,
        features=test_data.features()
    )

    scores = evaluate_predictions(
        preds=test_preds,
        targets=test_data.targets(),
        num_tasks=num_tasks,
        metrics=metrics,
        dataset_type=args.dataset_type,
        logger=logger
    )

    return scores
Example #4
0
 def evaluate(
     self,
     targets: List[List[float]],
     preds: List[List[float]],
     uncertainties: List[List[float]],
     mask: List[List[bool]],
 ):
     return evaluate_predictions(
         preds=uncertainties,
         targets=targets,
         num_tasks=np.array(targets).shape[1],
         metrics=[self.evaluation_method],
         dataset_type=self.dataset_type,
     )[self.evaluation_method]
Example #5
0
    def test_predict_spectra(self,
                             name: str,
                             model_type: str,
                             expected_score: float,
                             expected_nans: int,
                             train_flags: List[str] = None,
                             predict_flags: List[str] = None):
        with TemporaryDirectory() as save_dir:
            # Train
            dataset_type = 'spectra'
            self.train(dataset_type=dataset_type,
                       metric='sid',
                       save_dir=save_dir,
                       model_type=model_type,
                       flags=train_flags)

            # Predict
            preds_path = os.path.join(save_dir, 'preds.csv')
            self.predict(dataset_type=dataset_type,
                         preds_path=preds_path,
                         save_dir=save_dir,
                         model_type=model_type,
                         flags=predict_flags)

            # Check results
            pred = pd.read_csv(preds_path)
            true = pd.read_csv(os.path.join(TEST_DATA_DIR, 'spectra.csv'))
            self.assertEqual(list(pred.keys()), list(true.keys()))
            self.assertEqual(list(pred['smiles']), list(true['smiles']))

            pred, true = pred.drop(columns=['smiles']), true.drop(
                columns=['smiles'])
            pred, true = pred.to_numpy(), true.to_numpy()
            phase_features = load_features(predict_flags[1])
            if '--spectra_phase_mask_path' in train_flags:
                mask = load_phase_mask(train_flags[5])
            else:
                mask = None
            true = normalize_spectra(true, phase_features, mask)
            sid = evaluate_predictions(preds=pred,
                                       targets=true,
                                       num_tasks=len(true[0]),
                                       metrics=['sid'],
                                       dataset_type='spectra')['sid'][0]
            self.assertAlmostEqual(sid,
                                   expected_score,
                                   delta=DELTA * expected_score)
            self.assertEqual(np.sum(np.isnan(pred)), expected_nans)
def compute_values(dataset: str, preds: List[List[List[float]]],
                   targets: List[List[List[float]]]) -> List[float]:
    num_tasks = len(preds[0][0])

    values = [
        evaluate_predictions(preds=pred,
                             targets=target,
                             num_tasks=num_tasks,
                             metric_func=DATASETS[dataset]['metric'],
                             dataset_type=DATASETS[dataset]['type'],
                             logger=FAKE_LOGGER)
        for pred, target in tqdm(zip(preds, targets), total=len(preds))
    ]

    values = [np.nanmean(value) for value in values]

    return values
def run_training(args, save_dir):
    tgt_data, val_data, test_data, src_data = prepare_data(args)
    inv_model = prepare_model(args)

    print('invariant', inv_model)

    optimizer = build_optimizer(inv_model, args)
    scheduler = build_lr_scheduler(optimizer, args)
    inv_opt = (optimizer, scheduler)

    loss_func = get_loss_func(args)
    metric_func = get_metric_func(metric=args.metric)

    best_score = float('inf') if args.minimize_score else -float('inf')
    best_epoch = 0
    for epoch in range(args.epochs):
        print(f'Epoch {epoch}')
        train(inv_model, src_data, tgt_data, loss_func, inv_opt, args)

        val_scores = evaluate(inv_model, val_data, args.num_tasks, metric_func,
                              args.batch_size, args.dataset_type)
        avg_val_score = np.nanmean(val_scores)
        print(f'Validation {args.metric} = {avg_val_score:.4f}')
        if args.minimize_score and avg_val_score < best_score or not args.minimize_score and avg_val_score > best_score:
            best_score, best_epoch = avg_val_score, epoch
            save_checkpoint(os.path.join(save_dir, 'model.pt'),
                            inv_model,
                            args=args)

    print(f'Loading model checkpoint from epoch {best_epoch}')
    model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda)
    test_smiles, test_targets = test_data.smiles(), test_data.targets()
    test_preds = predict(model, test_data, args.batch_size)
    test_scores = evaluate_predictions(test_preds, test_targets,
                                       args.num_tasks, metric_func,
                                       args.dataset_type)

    avg_test_score = np.nanmean(test_scores)
    print(f'Test {args.metric} = {avg_test_score:.4f}')
    return avg_test_score