def test_benchmark_model_predict(tiny_files_structure): bm_model = BenchmarkModel() x_train = np.ones((10, 5)) y_train = (pd.read_csv(tiny_files_structure.raw.y_train, index_col=0).fillna(0).values) bm_model.fit(x_train, y_train) y_pred = bm_model.predict(x_train) assert y_pred.shape == y_train.shape assert np.isclose(y_pred.mean(axis=0), bm_model.params["mean_values"]).all()
def test_multiple_model_runner_run_pipelines(sample_ids, tiny_files_structure): training_list = [(BenchmarkDataset(), BenchmarkModel())] * 2 runner = PipelineRunner(tiny_files_structure) runner.splitter.split = lambda *args: (sample_ids, sample_ids) multi_runner = MultipleModelRunner(training_list) results = multi_runner.run_multiple_pipelines(sample_ids, runner, 0.5) assert results
def test_get_model_path(): runner = PipelineRunner() model = BenchmarkModel() dataset = BenchmarkDataset() path = runner.get_model_path(model=model, dataset=dataset) assert path assert model.version in path.stem assert dataset.version in path.stem
def test_model_trainer(tiny_files_structure): trainer = ModelTrainer() X_train = pd.read_csv(tiny_files_structure.raw.correlations, index_col=0) y_train = (pd.read_csv(tiny_files_structure.raw.y_train, index_col=0).fillna(0).values) results = trainer.train_model(BenchmarkModel(), X_train, y_train) assert results.model.params["mean_values"].all() assert results.train_mae assert results.train_weighted_mae
def test_add_info_to_results(sample_ids): runner = PipelineRunner() ds, model = BenchmarkDataset(), BenchmarkModel() results = TrainingResults() runner.add_information_to_results(results, ds, model, sample_ids, sample_ids) assert results.model_path assert results.train_ids.any() assert results.val_ids.any() assert results.dataset_version
def test_evaluate_validation_set(): runner = PipelineRunner() x_val = np.random.random((10, 20)) y_val = np.random.random((10, 5)) results = TrainingResults(model=BenchmarkModel()) results.model.predict = lambda *args: np.random.random((10, 5)) runner.evaluate_validation_set(results, x_val, y_val) assert results.validation_weighted_mae assert results.validation_mae
def test_pipeline_runner_run_calls(tiny_files_structure, sample_ids): runner = PipelineRunner(file_structure=tiny_files_structure, ) runner.ds_builder = Mock(spec=DatasetBuilder) runner.model_trainer = Mock(spec=ModelTrainer) runner.get_model_path = Mock(spec=runner.get_model_path) runner.build_datasets = Mock(spec=runner.build_datasets, return_value=(0, 0, 0, 0)) runner.splitter.split = Mock(spec=runner.splitter.split, return_value=(0, 0)) runner.evaluate_validation_set = Mock(spec=runner.evaluate_validation_set) runner.run_pipeline( sample_ids, dataset=BenchmarkDataset(), model=BenchmarkModel(), ) runner.splitter.split.assert_called_once_with(sample_ids, 0.2) runner.build_datasets.assert_called_once() runner.get_model_path.assert_called_once() runner.model_trainer.train_model.assert_called_once() runner.evaluate_validation_set.assert_called_once()
def test_benchmark_model_fit(tiny_files_structure): bm_model = BenchmarkModel() y_train = pd.read_csv(tiny_files_structure.raw.y_train, index_col=0) bm_model.fit(pd.DataFrame([0]).values, y_train) assert bm_model.params["mean_values"].all() assert bm_model.params["mean_values"].shape == (5, )