コード例 #1
0
class Matrices(TestCase):
    config = {
        'temporal_config': sample_config()['temporal_config'],
        'feature_aggregations': sample_config()['feature_aggregations'],
        'cohort_config': sample_config()['cohort_config'],
        'label_config': sample_config()['label_config'],
        'config_version': sample_config()['config_version']
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            matrices_path = experiment.matrices_directory
            matrices_and_metadata = [f for f in os.listdir(matrices_path) if isfile(join(matrices_path, f))]
            matrices = experiment.matrix_build_tasks
            assert len(matrices) > 0
            for matrix in matrices:
                assert '{}.csv'.format(matrix) in matrices_and_metadata
                assert '{}.yaml'.format(matrix) in matrices_and_metadata

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #2
0
class PostimputationFeatures(TestCase):
    config = {
        "temporal_config": sample_config()["temporal_config"],
        "feature_aggregations": sample_config()["feature_aggregations"],
        "cohort_config": sample_config()["cohort_config"],
        "config_version": sample_config()["config_version"],
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            generated_tables = [
                table
                for table in schema_tables(experiment.features_schema_name,
                                           experiment.db_engine).keys()
                if "_aggregation_imputed" in table
            ]

            assert len(generated_tables) == len(
                sample_config()["feature_aggregations"])
            for table in generated_tables:
                table_should_have_data(table, experiment.db_engine)

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #3
0
def test_fill_model_grid_presets():

    # case 1: has grid, no preset
    config = sample_config()
    fill_grid = fill_model_grid_presets(config)
    assert fill_grid == config['grid_config']

    # case 2: has preset, no grid
    config = sample_config()
    config.pop('grid_config')
    config['model_grid_preset'] = 'quickstart'
    fill_grid = fill_model_grid_presets(config)
    assert len(fill_grid) == 3

    # case 3: neither
    config = sample_config()
    config.pop('grid_config')
    fill_grid = fill_model_grid_presets(config)
    assert fill_grid is None

    # case 4: both
    config = sample_config()
    config['model_grid_preset'] = 'quickstart'
    with pytest.raises(KeyError):
        fill_grid = fill_model_grid_presets(config)
コード例 #4
0
def test_restart_experiment(experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        populate_source_data(db_engine)
        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=sample_config(),
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
                cleanup=True,
            )
            experiment.run()

            evaluations = num_linked_evaluations(db_engine)
            assert evaluations > 0

            experiment = experiment_class(
                config=sample_config(),
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
                cleanup=True,
                replace=False,
            )
            experiment.make_entity_date_table = mock.Mock()
            experiment.run()
            assert not experiment.make_entity_date_table.called
コード例 #5
0
class PreimputationFeatures(TestCase):
    config = {
        'temporal_config': sample_config()['temporal_config'],
        'feature_aggregations': sample_config()['feature_aggregations'],
        'config_version': sample_config()['config_version']
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            generated_tables = [
                table
                for table in schema_tables(
                    experiment.features_schema_name,
                    experiment.db_engine
                ).keys()
                if '_aggregation' in table
            ]

            assert len(generated_tables) == len(sample_config()['feature_aggregations'])
            for table in generated_tables:
                table_should_have_data(table, experiment.db_engine)

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #6
0
def test_filepaths_and_queries_give_same_hashes(experiment_class):
    with testing.postgresql.Postgresql() as postgresql, TemporaryDirectory(
    ) as temp_dir, mock.patch("triage.util.conf.open",
                              side_effect=open_side_effect) as mock_file:
        db_engine = create_engine(postgresql.url())
        populate_source_data(db_engine)
        query_config = sample_config(query_source="query")
        file_config = sample_config(query_source="filepath")

        experiment_with_queries = experiment_class(
            config=query_config,
            db_engine=db_engine,
            project_path=os.path.join(temp_dir, "inspections"),
            cleanup=True,
        )
        experiment_with_filepaths = experiment_class(
            config=file_config,
            db_engine=db_engine,
            project_path=os.path.join(temp_dir, "inspections"),
            cleanup=True,
        )
        assert (experiment_with_queries.experiment_hash ==
                experiment_with_filepaths.experiment_hash)
        assert (experiment_with_queries.cohort_table_name ==
                experiment_with_filepaths.cohort_table_name)
        assert (experiment_with_queries.labels_table_name ==
                experiment_with_filepaths.labels_table_name)
コード例 #7
0
ファイル: test_defaults.py プロジェクト: djynnius/triage
def test_fill_model_grid_presets():

    # case 1: has grid, no preset
    config = sample_config()
    fill_grid = fill_model_grid_presets(config)
    assert fill_grid == config['grid_config']

    # case 2: has preset, no grid
    config = sample_config()
    config.pop('grid_config')
    config['model_grid_preset'] = 'quickstart'
    fill_grid = fill_model_grid_presets(config)
    assert len(fill_grid) == 3

    # case 3: neither
    config = sample_config()
    config.pop('grid_config')
    fill_grid = fill_model_grid_presets(config)
    assert fill_grid is None

    # case 4: both
    config = sample_config()
    config['model_grid_preset'] = 'quickstart'
    fill_grid = fill_model_grid_presets(config)
    assert len(fill_grid) == 3
    assert len(fill_grid.get('sklearn.tree.DecisionTreeClassifier', {}).get('max_depth', [])) == 3
    assert len(fill_grid.get('sklearn.tree.DecisionTreeClassifier', {}).get('criterion', [])) == 1
コード例 #8
0
class Matrices(TestCase):
    config = {
        "temporal_config": sample_config()["temporal_config"],
        "feature_aggregations": sample_config()["feature_aggregations"],
        "cohort_config": sample_config()["cohort_config"],
        "label_config": sample_config()["label_config"],
        "config_version": sample_config()["config_version"],
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            matrices_path = join(experiment.project_path, "matrices")
            matrices_and_metadata = [
                f for f in os.listdir(matrices_path)
                if isfile(join(matrices_path, f))
            ]
            matrices = experiment.matrix_build_tasks
            assert len(matrices) > 0
            for matrix in matrices:
                assert "{}.csv".format(matrix) in matrices_and_metadata
                assert "{}.yaml".format(matrix) in matrices_and_metadata

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #9
0
def test_experiment_validator():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        populate_source_data(db_engine)
        with mock.patch("triage.util.conf.open",
                        side_effect=open_side_effect) as mock_file:
            ExperimentValidator(db_engine).run(sample_config("query"))
            ExperimentValidator(db_engine).run(sample_config("filepath"))
コード例 #10
0
def test_experiment_tracker(test_engine, project_path):
    experiment = MultiCoreExperiment(
        config=sample_config(),
        db_engine=test_engine,
        project_path=project_path,
        n_processes=4,
    )
    experiment_run = Session(bind=test_engine).query(ExperimentRun).get(experiment.run_id)
    assert experiment_run.current_status == ExperimentRunStatus.started
    assert experiment_run.experiment_hash == experiment.experiment_hash
    assert experiment_run.experiment_class_path == 'triage.experiments.multicore.MultiCoreExperiment'
    assert experiment_run.platform
    assert experiment_run.os_user
    assert experiment_run.installed_libraries
    assert experiment_run.matrices_skipped == 0
    assert experiment_run.matrices_errored == 0
    assert experiment_run.matrices_made == 0
    assert experiment_run.models_skipped == 0
    assert experiment_run.models_errored == 0
    assert experiment_run.models_made == 0

    experiment.run()
    experiment_run = Session(bind=test_engine).query(ExperimentRun).get(experiment.run_id)
    assert experiment_run.start_method == "run"
    assert experiment_run.matrices_made == len(experiment.matrix_build_tasks)
    assert experiment_run.matrices_skipped == 0
    assert experiment_run.matrices_errored == 0
    assert experiment_run.models_skipped == 0
    assert experiment_run.models_errored == 0
    assert experiment_run.models_made == len(list(task['train_kwargs']['model_hash'] for batch in experiment._all_train_test_batches() for task in batch.tasks))
    assert isinstance(experiment_run.matrix_building_started, datetime.datetime)
    assert isinstance(experiment_run.model_building_started, datetime.datetime)
    assert isinstance(experiment_run.last_updated_time, datetime.datetime)
    assert not experiment_run.stacktrace
    assert experiment_run.current_status == ExperimentRunStatus.completed
コード例 #11
0
ファイル: test_experiments.py プロジェクト: djynnius/triage
def test_build_error_cleanup_timeout(_clean_up_mock, experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())

        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=sample_config(),
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
                cleanup=True,
                cleanup_timeout=0.02,  # Set short timeout
                skip_validation=
                True,  # avoid catching the missing data at validation stage
            )

            with mock.patch.object(experiment,
                                   "generate_matrices") as build_mock:
                build_mock.side_effect = RuntimeError("boom!")

                with pytest.raises(TimeoutError) as exc_info:
                    experiment()

    # Last exception is TimeoutError, but earlier error is preserved in
    # __context__, and will be noted as well in any standard traceback:
    assert exc_info.value.__context__ is build_mock.side_effect
コード例 #12
0
def test_build_error_cleanup_timeout(_clean_up_mock, experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)

        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=sample_config(),
                db_engine=db_engine,
                model_storage_class=FSModelStorageEngine,
                project_path=os.path.join(temp_dir, 'inspections'),
                cleanup=True,
                cleanup_timeout=0.02,  # Set short timeout
            )

            with mock.patch.object(experiment,
                                   'generate_matrices') as build_mock:
                build_mock.side_effect = RuntimeError('boom!')

                with pytest.raises(TimeoutError) as exc_info:
                    experiment()

    # Last exception is TimeoutError, but earlier error is preserved in
    # __context__, and will be noted as well in any standard traceback:
    assert exc_info.value.__context__ is build_mock.side_effect
コード例 #13
0
ファイル: test_defaults.py プロジェクト: djynnius/triage
def test_fill_timechop_config_missing():
    remove_keys = [
        'model_update_frequency',
        'training_as_of_date_frequencies',
        'test_as_of_date_frequencies',
        'max_training_histories',
        'test_durations',
        'feature_start_time',
        'feature_end_time',
        'label_start_time',
        'label_end_time',
        'training_label_timespans',
        'test_label_timespans'
        ]

    # ensure redundant keys properly raise errors
    config = sample_config()
    config['temporal_config']['label_timespans'] = '1y'
    with pytest.raises(KeyError):
        timechop_config = fill_timechop_config_missing(config, None)

    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        populate_source_data(db_engine)
        config = sample_config()

        for key in remove_keys:
            config['temporal_config'].pop(key)
        config['temporal_config']['label_timespans'] = '1y'

        timechop_config = fill_timechop_config_missing(config, db_engine)

        assert timechop_config['model_update_frequency'] == '100y'
        assert timechop_config['training_as_of_date_frequencies'] == '100y'
        assert timechop_config['test_as_of_date_frequencies'] == '100y'
        assert timechop_config['max_training_histories'] == '0d'
        assert timechop_config['test_durations'] == '0d'
        assert timechop_config['training_label_timespans'] == '1y'
        assert timechop_config['test_label_timespans'] == '1y'
        assert 'label_timespans' not in timechop_config.keys()
        assert timechop_config['feature_start_time'] == '2010-10-01'
        assert timechop_config['feature_end_time'] == '2013-10-01'
        assert timechop_config['label_start_time'] == '2010-10-01'
        assert timechop_config['label_end_time'] == '2013-10-01'
コード例 #14
0
def test_profiling(db_engine):
    populate_source_data(db_engine)
    with TemporaryDirectory() as temp_dir:
        project_path = os.path.join(temp_dir, "inspections")
        SingleThreadedExperiment(config=sample_config(),
                                 db_engine=db_engine,
                                 project_path=project_path,
                                 profile=True).run()
        assert len(os.listdir(os.path.join(project_path,
                                           "profiling_stats"))) == 1
コード例 #15
0
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config["config_version"] = "v0"
     with TemporaryDirectory() as temp_dir:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 project_path=os.path.join(temp_dir, "inspections"),
             )
コード例 #16
0
class GetSplits(TestCase):
    config = {
        "temporal_config": sample_config()["temporal_config"],
        "config_version": sample_config()["config_version"],
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            assert experiment.split_definitions

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #17
0
class Cohort(TestCase):
    config = {
        'temporal_config': sample_config()['temporal_config'],
        'cohort_config': sample_config()['cohort_config'],
        'config_version': sample_config()['config_version']
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            table_should_have_data(experiment.sparse_states_table_name, experiment.db_engine)

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #18
0
class Labels(TestCase):
    config = {
        "temporal_config": sample_config()["temporal_config"],
        "label_config": sample_config()["label_config"],
        "config_version": sample_config()["config_version"],
    }

    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            table_should_have_data(experiment.labels_table_name, experiment.db_engine)

    def test_validate_nonstrict(self):
        with prepare_experiment(self.config) as experiment:
            experiment.validate(strict=False)

    def test_validate_strict(self):
        with prepare_experiment(self.config) as experiment:
            with self.assertRaises(ValueError):
                experiment.validate()
コード例 #19
0
ファイル: test_experiments.py プロジェクト: afcarl/triage
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config['config_version'] = 'v0'
     with TemporaryDirectory() as temp_dir:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 model_storage_class=FSModelStorageEngine,
                 project_path=os.path.join(temp_dir, 'inspections'),
             )
コード例 #20
0
def test_experiment_tracker_in_parts(test_engine, project_path):
    experiment = SingleThreadedExperiment(
        config=sample_config(),
        db_engine=test_engine,
        project_path=project_path,
    )
    experiment.generate_matrices()
    experiment.train_and_test_models()
    with scoped_session(test_engine) as session:
        experiment_run = session.query(ExperimentRun).get(experiment.run_id)
        assert experiment_run.start_method == "generate_matrices"
コード例 #21
0
def test_serializable_engine_check_sqlalchemy_fail():
    """If we pass a vanilla sqlalchemy engine to the experiment we should blow up"""
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = sqlalchemy.create_engine(postgresql.url())
        with TemporaryDirectory() as temp_dir:
            with pytest.raises(TypeError):
                MultiCoreExperiment(
                    config=sample_config(),
                    db_engine=db_engine,
                    project_path=os.path.join(temp_dir, "inspections"),
                )
コード例 #22
0
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config["config_version"] = "v0"
     with TemporaryDirectory() as temp_dir, mock.patch(
             "triage.util.conf.open",
             side_effect=open_side_effect) as mock_file:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 project_path=os.path.join(temp_dir, "inspections"),
             )
コード例 #23
0
    def test_run(self):
        with prepare_experiment(self.config) as experiment:
            experiment.run()
            generated_tables = [
                table
                for table in schema_tables(experiment.features_schema_name, experiment.db_engine).keys()
                if '_aggregation_imputed' in table
            ]

            assert len(generated_tables) == len(sample_config()['feature_aggregations'])
            for table in generated_tables:
                table_should_have_data(table, experiment.db_engine)
コード例 #24
0
ファイル: test_defaults.py プロジェクト: djynnius/triage
def test_fill_cohort_config_missing():
    config = sample_config()
    config.pop('cohort_config')
    cohort_config = fill_cohort_config_missing(config)
    assert cohort_config == {
        'query': "select distinct entity_id from "
                "((select entity_id, as_of_date as knowledge_date from "
                "(select * from cat_complaints) as t)\n union \n(select entity_id, "
                "as_of_date as knowledge_date from (select * from entity_zip_codes "
                "join zip_code_events using (zip_code)) as t)) as e "
                "where knowledge_date < '{as_of_date}'",
        'name': 'all_entities'
        }
コード例 #25
0
def test_custom_label_name(experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        config = sample_config()
        config["label_config"]["name"] = "custom_label_name"
        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=config,
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
            )
            assert experiment.label_generator.label_name == "custom_label_name"
            assert experiment.planner.label_names == ["custom_label_name"]
コード例 #26
0
    def test_load_if_right_version(self):
        experiment_config = sample_config()
        experiment_config["config_version"] = CONFIG_VERSION
        with testing.postgresql.Postgresql() as postgresql:
            db_engine = create_engine(postgresql.url())
            with TemporaryDirectory() as temp_dir:
                experiment = SingleThreadedExperiment(
                    config=experiment_config,
                    db_engine=db_engine,
                    project_path=os.path.join(temp_dir, "inspections"),
                )

        assert isinstance(experiment, SingleThreadedExperiment)
コード例 #27
0
def test_experiment_tracker_in_parts(test_engine, project_path):
    with mock.patch("triage.util.conf.open",
                    side_effect=open_side_effect) as mock_file:
        experiment = SingleThreadedExperiment(
            config=sample_config(),
            db_engine=test_engine,
            project_path=project_path,
        )
    experiment.generate_matrices()
    experiment.train_and_test_models()
    with scoped_session(test_engine) as session:
        experiment_run = session.query(TriageRun).get(experiment.run_id)
        assert experiment_run.start_method == "generate_matrices"
コード例 #28
0
def test_cleanup_timeout(_clean_up_mock, experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        populate_source_data(db_engine)
        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=sample_config(),
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
                cleanup=True,
                cleanup_timeout=0.02,  # Set short timeout
            )
            with pytest.raises(TimeoutError):
                experiment()
コード例 #29
0
def test_custom_label_name(experiment_class):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        config = sample_config()
        config['label_config']['name'] = 'custom_label_name'
        with TemporaryDirectory() as temp_dir:
            experiment = experiment_class(
                config=config,
                db_engine=db_engine,
                model_storage_class=FSModelStorageEngine,
                project_path=os.path.join(temp_dir, 'inspections'),
            )
            assert experiment.label_generator.label_name == 'custom_label_name'
            assert experiment.planner.label_names == ['custom_label_name']
コード例 #30
0
ファイル: conftest.py プロジェクト: zshi1/triage
def finished_experiment(shared_db_engine, shared_project_storage):
    """A successfully-run experiment. Its database schemas and project storage can be queried.

    Returns: (triage.experiments.SingleThreadedExperiment)
    """
    populate_source_data(shared_db_engine)
    base_config = sample_config()
    experiment = SingleThreadedExperiment(
        base_config,
        db_engine=shared_db_engine,
        project_path=shared_project_storage.project_path
    )
    experiment.run()
    return experiment