Exemple #1
0
def test_benchmark_model_predict(tiny_files_structure):
    bm_model = BenchmarkModel()
    x_train = np.ones((10, 5))
    y_train = (pd.read_csv(tiny_files_structure.raw.y_train,
                           index_col=0).fillna(0).values)
    bm_model.fit(x_train, y_train)
    y_pred = bm_model.predict(x_train)

    assert y_pred.shape == y_train.shape
    assert np.isclose(y_pred.mean(axis=0),
                      bm_model.params["mean_values"]).all()
def test_multiple_model_runner_run_pipelines(sample_ids, tiny_files_structure):
    training_list = [(BenchmarkDataset(), BenchmarkModel())] * 2
    runner = PipelineRunner(tiny_files_structure)
    runner.splitter.split = lambda *args: (sample_ids, sample_ids)
    multi_runner = MultipleModelRunner(training_list)
    results = multi_runner.run_multiple_pipelines(sample_ids, runner, 0.5)

    assert results
def test_get_model_path():
    runner = PipelineRunner()

    model = BenchmarkModel()
    dataset = BenchmarkDataset()
    path = runner.get_model_path(model=model, dataset=dataset)
    assert path
    assert model.version in path.stem
    assert dataset.version in path.stem
def test_model_trainer(tiny_files_structure):
    trainer = ModelTrainer()
    X_train = pd.read_csv(tiny_files_structure.raw.correlations, index_col=0)
    y_train = (pd.read_csv(tiny_files_structure.raw.y_train,
                           index_col=0).fillna(0).values)

    results = trainer.train_model(BenchmarkModel(), X_train, y_train)
    assert results.model.params["mean_values"].all()
    assert results.train_mae
    assert results.train_weighted_mae
def test_add_info_to_results(sample_ids):
    runner = PipelineRunner()
    ds, model = BenchmarkDataset(), BenchmarkModel()
    results = TrainingResults()
    runner.add_information_to_results(results, ds, model, sample_ids,
                                      sample_ids)

    assert results.model_path
    assert results.train_ids.any()
    assert results.val_ids.any()
    assert results.dataset_version
def test_evaluate_validation_set():
    runner = PipelineRunner()

    x_val = np.random.random((10, 20))
    y_val = np.random.random((10, 5))

    results = TrainingResults(model=BenchmarkModel())
    results.model.predict = lambda *args: np.random.random((10, 5))
    runner.evaluate_validation_set(results, x_val, y_val)

    assert results.validation_weighted_mae
    assert results.validation_mae
def test_pipeline_runner_run_calls(tiny_files_structure, sample_ids):
    runner = PipelineRunner(file_structure=tiny_files_structure, )
    runner.ds_builder = Mock(spec=DatasetBuilder)
    runner.model_trainer = Mock(spec=ModelTrainer)

    runner.get_model_path = Mock(spec=runner.get_model_path)
    runner.build_datasets = Mock(spec=runner.build_datasets,
                                 return_value=(0, 0, 0, 0))
    runner.splitter.split = Mock(spec=runner.splitter.split,
                                 return_value=(0, 0))
    runner.evaluate_validation_set = Mock(spec=runner.evaluate_validation_set)
    runner.run_pipeline(
        sample_ids,
        dataset=BenchmarkDataset(),
        model=BenchmarkModel(),
    )

    runner.splitter.split.assert_called_once_with(sample_ids, 0.2)
    runner.build_datasets.assert_called_once()
    runner.get_model_path.assert_called_once()
    runner.model_trainer.train_model.assert_called_once()
    runner.evaluate_validation_set.assert_called_once()
Exemple #8
0
def test_benchmark_model_fit(tiny_files_structure):
    bm_model = BenchmarkModel()
    y_train = pd.read_csv(tiny_files_structure.raw.y_train, index_col=0)
    bm_model.fit(pd.DataFrame([0]).values, y_train)
    assert bm_model.params["mean_values"].all()
    assert bm_model.params["mean_values"].shape == (5, )