def evaluate(model: nn.Module, data: DataLoader, metric_func: Callable, args: Namespace) -> List[float]: """ Evaluates an ensemble of models on a dataset. :param model: A model. :param data: A GlassDataset. :param metric_func: Metric function which takes in a list of targets and a list of predictions. :param args: Arguments. :return: A list with the score for each task based on `metric_func`. """ targets = [] with torch.no_grad(): model.eval() preds = [] for batch in tqdm(data, total=len(data)): targets.extend(batch.y.float().unsqueeze(1)) # Prepare batch batch = GlassBatchMolGraph(batch) # Run model batch_preds = model(batch) batch_preds = batch_preds.data.cpu().numpy() preds.extend(batch_preds.tolist()) results = evaluate_predictions(preds=preds, targets=targets, metric_func=metric_func, dataset_type=args.dataset_type) return results
def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC], train_data: MoleculeDataset, test_data: MoleculeDataset, metrics: List[str], args: SklearnTrainArgs, logger: Logger = None) -> List[float]: """ Trains a single-task scikit-learn model, meaning a separate model is trained for each task. This is necessary if some tasks have None (unknown) values. :param model: The scikit-learn model to train. :param train_data: The training data. :param test_data: The test data. :param metrics: A list of names of metric functions. :param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for training the scikit-learn model. :param logger: A logger to record output. :return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task. """ scores = {} num_tasks = train_data.num_tasks() for task_num in trange(num_tasks): # Only get features and targets for molecules where target is not None train_features, train_targets = zip(*[(features, targets[task_num]) for features, targets in zip(train_data.features(), train_data.targets()) if targets[task_num] is not None]) test_features, test_targets = zip(*[(features, targets[task_num]) for features, targets in zip(test_data.features(), test_data.targets()) if targets[task_num] is not None]) model.fit(train_features, train_targets) test_preds = predict( model=model, model_type=args.model_type, dataset_type=args.dataset_type, features=test_features ) test_targets = [[target] for target in test_targets] score = evaluate_predictions( preds=test_preds, targets=test_targets, num_tasks=1, metrics=metrics, dataset_type=args.dataset_type, logger=logger ) for metric in metrics: if metric not in scores: scores[metric] = [] scores[metric].append(score[metric][0]) return scores
def multi_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC], train_data: MoleculeDataset, test_data: MoleculeDataset, metrics: List[str], args: SklearnTrainArgs, logger: Logger = None) -> Dict[str, List[float]]: """ Trains a multi-task scikit-learn model, meaning one model is trained simultaneously on all tasks. This is only possible if none of the tasks have None (unknown) values. :param model: The scikit-learn model to train. :param train_data: The training data. :param test_data: The test data. :param metrics: A list of names of metric functions. :param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for training the scikit-learn model. :param logger: A logger to record output. :return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task. """ num_tasks = train_data.num_tasks() train_targets = train_data.targets() if train_data.num_tasks() == 1: train_targets = [targets[0] for targets in train_targets] # Train model.fit(train_data.features(), train_targets) # Save model with open(os.path.join(args.save_dir, 'model.pkl'), 'wb') as f: pickle.dump(model, f) test_preds = predict( model=model, model_type=args.model_type, dataset_type=args.dataset_type, features=test_data.features() ) scores = evaluate_predictions( preds=test_preds, targets=test_data.targets(), num_tasks=num_tasks, metrics=metrics, dataset_type=args.dataset_type, logger=logger ) return scores
def evaluate( self, targets: List[List[float]], preds: List[List[float]], uncertainties: List[List[float]], mask: List[List[bool]], ): return evaluate_predictions( preds=uncertainties, targets=targets, num_tasks=np.array(targets).shape[1], metrics=[self.evaluation_method], dataset_type=self.dataset_type, )[self.evaluation_method]
def test_predict_spectra(self, name: str, model_type: str, expected_score: float, expected_nans: int, train_flags: List[str] = None, predict_flags: List[str] = None): with TemporaryDirectory() as save_dir: # Train dataset_type = 'spectra' self.train(dataset_type=dataset_type, metric='sid', save_dir=save_dir, model_type=model_type, flags=train_flags) # Predict preds_path = os.path.join(save_dir, 'preds.csv') self.predict(dataset_type=dataset_type, preds_path=preds_path, save_dir=save_dir, model_type=model_type, flags=predict_flags) # Check results pred = pd.read_csv(preds_path) true = pd.read_csv(os.path.join(TEST_DATA_DIR, 'spectra.csv')) self.assertEqual(list(pred.keys()), list(true.keys())) self.assertEqual(list(pred['smiles']), list(true['smiles'])) pred, true = pred.drop(columns=['smiles']), true.drop( columns=['smiles']) pred, true = pred.to_numpy(), true.to_numpy() phase_features = load_features(predict_flags[1]) if '--spectra_phase_mask_path' in train_flags: mask = load_phase_mask(train_flags[5]) else: mask = None true = normalize_spectra(true, phase_features, mask) sid = evaluate_predictions(preds=pred, targets=true, num_tasks=len(true[0]), metrics=['sid'], dataset_type='spectra')['sid'][0] self.assertAlmostEqual(sid, expected_score, delta=DELTA * expected_score) self.assertEqual(np.sum(np.isnan(pred)), expected_nans)
def compute_values(dataset: str, preds: List[List[List[float]]], targets: List[List[List[float]]]) -> List[float]: num_tasks = len(preds[0][0]) values = [ evaluate_predictions(preds=pred, targets=target, num_tasks=num_tasks, metric_func=DATASETS[dataset]['metric'], dataset_type=DATASETS[dataset]['type'], logger=FAKE_LOGGER) for pred, target in tqdm(zip(preds, targets), total=len(preds)) ] values = [np.nanmean(value) for value in values] return values
def run_training(args, save_dir): tgt_data, val_data, test_data, src_data = prepare_data(args) inv_model = prepare_model(args) print('invariant', inv_model) optimizer = build_optimizer(inv_model, args) scheduler = build_lr_scheduler(optimizer, args) inv_opt = (optimizer, scheduler) loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) best_score = float('inf') if args.minimize_score else -float('inf') best_epoch = 0 for epoch in range(args.epochs): print(f'Epoch {epoch}') train(inv_model, src_data, tgt_data, loss_func, inv_opt, args) val_scores = evaluate(inv_model, val_data, args.num_tasks, metric_func, args.batch_size, args.dataset_type) avg_val_score = np.nanmean(val_scores) print(f'Validation {args.metric} = {avg_val_score:.4f}') if args.minimize_score and avg_val_score < best_score or not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), inv_model, args=args) print(f'Loading model checkpoint from epoch {best_epoch}') model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda) test_smiles, test_targets = test_data.smiles(), test_data.targets() test_preds = predict(model, test_data, args.batch_size) test_scores = evaluate_predictions(test_preds, test_targets, args.num_tasks, metric_func, args.dataset_type) avg_test_score = np.nanmean(test_scores) print(f'Test {args.metric} = {avg_test_score:.4f}') return avg_test_score