Ejemplo n.º 1
0
def run_exp(config_file, plot_timechops=True, run_exp=True, n_jobs=1):
    if plot_timechops:
        visualize_timechop(config_file)

    config, sql_engine, proj_folder = setup_experiment(config_file)
    
    if run_exp: 
        if n_jobs> 1:
            experiment = MultiCoreExperiment(
                config=config,
                db_engine=sql_engine,
                n_processes=n_jobs,
                n_db_processes=n_jobs,
                project_path=proj_folder,
                replace=False,
                cleanup=True
            )
        else:
            experiment = SingleThreadedExperiment(
                config=config,
                db_engine=sql_engine,
                project_path=proj_folder,
                cleanup=True
            )

        st = time.time()
        experiment.run()
        en = time.time()

        print('Took {} seconds to run the experiement'.format(en-st))
Ejemplo n.º 2
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = self._load_config()
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine": db_engine,
         "project_path": self.args.project_path,
         "config": config,
         "replace": self.args.replace,
         "materialize_subquery_fromobjs": self.args.materialize_fromobjs,
         "features_ignore_cohort": self.args.features_ignore_cohort,
         "matrix_storage_class": self.matrix_storage_map[self.args.matrix_format],
         "profile": self.args.profile,
         "save_predictions": self.args.save_predictions,
         "skip_validation": not self.args.validate
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs,
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Ejemplo n.º 3
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine":
         db_engine,
         "project_path":
         self.args.project_path,
         "config":
         config,
         "replace":
         self.args.replace,
         "matrix_storage_class":
         self.matrix_storage_map[self.args.matrix_format],
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs,
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Ejemplo n.º 4
0
def triage(ctx, config_file, triage_db, replace, debug):

    config_file = os.path.join(os.sep, "triage", "experiment_config",
                               config_file)

    click.echo(f"Using the config file {config_file}")

    with open(config_file) as f:
        experiment_config = yaml.load(f)

    click.echo(
        f"The output (matrices and models) of this experiment will be stored in triage/output"
    )
    click.echo(f"Using data stored in {triage_db}")
    click.echo(
        f"The experiment will utilize any preexisting matrix or model: {not replace}"
    )
    click.echo(f"Creating experiment object")

    experiment = SingleThreadedExperiment(
        config=experiment_config,
        db_engine=sqlalchemy.create_engine(triage_db),
        model_storage_class=FSModelStorageEngine,
        project_path='/triage/output',
        replace=replace)

    ctx.obj = experiment

    if debug:
        logging.basicConfig(level=logging.DEBUG)
        click.echo("Debug enabled (Expect A LOT of output at the screen!!!)")

    click.echo("Experiment loaded")
Ejemplo n.º 5
0
def triage(ctx, config_file, triage_db, replace):

    config_file = os.path.join(os.sep, "triage", "experiment_config",
                               config_file)

    click.echo(f"Using the config file {config_file}")

    with open(config_file) as f:
        experiment_config = yaml.load(f)

    click.echo(
        f"The output (matrices and models) of this experiment will be stored in triage/output"
    )
    click.echo(
        f"The experiment will utilize any preexisting matrix or model: {not replace}"
    )
    click.echo(f"Creating experiment object")

    experiment = SingleThreadedExperiment(
        config=experiment_config,
        db_engine=sqlalchemy.create_engine(triage_db),
        model_storage_class=FSModelStorageEngine,
        project_path='/triage/output',
        replace=replace)

    ctx.obj = experiment

    click.echo("Experiment loaded")
Ejemplo n.º 6
0
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config["config_version"] = "v0"
     with TemporaryDirectory() as temp_dir:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 project_path=os.path.join(temp_dir, "inspections"),
             )
Ejemplo n.º 7
0
def test_profiling(db_engine):
    populate_source_data(db_engine)
    with TemporaryDirectory() as temp_dir:
        project_path = os.path.join(temp_dir, "inspections")
        SingleThreadedExperiment(config=sample_config(),
                                 db_engine=db_engine,
                                 project_path=project_path,
                                 profile=True).run()
        assert len(os.listdir(os.path.join(project_path,
                                           "profiling_stats"))) == 1
Ejemplo n.º 8
0
def test_experiment_tracker_in_parts(test_engine, project_path):
    experiment = SingleThreadedExperiment(
        config=sample_config(),
        db_engine=test_engine,
        project_path=project_path,
    )
    experiment.generate_matrices()
    experiment.train_and_test_models()
    with scoped_session(test_engine) as session:
        experiment_run = session.query(ExperimentRun).get(experiment.run_id)
        assert experiment_run.start_method == "generate_matrices"
Ejemplo n.º 9
0
def prepare_experiment(config):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        populate_source_data(db_engine)
        with TemporaryDirectory() as temp_dir:
            experiment = SingleThreadedExperiment(config=config,
                                                  db_engine=db_engine,
                                                  project_path=os.path.join(
                                                      temp_dir, 'inspections'),
                                                  cleanup=False)
            yield experiment
Ejemplo n.º 10
0
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config['config_version'] = 'v0'
     with TemporaryDirectory() as temp_dir:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 model_storage_class=FSModelStorageEngine,
                 project_path=os.path.join(temp_dir, 'inspections'),
             )
Ejemplo n.º 11
0
 def test_noload_if_wrong_version(self):
     experiment_config = sample_config()
     experiment_config["config_version"] = "v0"
     with TemporaryDirectory() as temp_dir, mock.patch(
             "triage.util.conf.open",
             side_effect=open_side_effect) as mock_file:
         with self.assertRaises(ValueError):
             SingleThreadedExperiment(
                 config=experiment_config,
                 db_engine=None,
                 project_path=os.path.join(temp_dir, "inspections"),
             )
Ejemplo n.º 12
0
    def test_load_if_right_version(self):
        experiment_config = sample_config()
        experiment_config["config_version"] = CONFIG_VERSION
        with testing.postgresql.Postgresql() as postgresql:
            db_engine = create_engine(postgresql.url())
            with TemporaryDirectory() as temp_dir:
                experiment = SingleThreadedExperiment(
                    config=experiment_config,
                    db_engine=db_engine,
                    project_path=os.path.join(temp_dir, "inspections"),
                )

        assert isinstance(experiment, SingleThreadedExperiment)
Ejemplo n.º 13
0
def test_experiment_tracker_in_parts(test_engine, project_path):
    with mock.patch("triage.util.conf.open",
                    side_effect=open_side_effect) as mock_file:
        experiment = SingleThreadedExperiment(
            config=sample_config(),
            db_engine=test_engine,
            project_path=project_path,
        )
    experiment.generate_matrices()
    experiment.train_and_test_models()
    with scoped_session(test_engine) as session:
        experiment_run = session.query(TriageRun).get(experiment.run_id)
        assert experiment_run.start_method == "generate_matrices"
Ejemplo n.º 14
0
def finished_experiment(shared_db_engine, shared_project_storage):
    """A successfully-run experiment. Its database schemas and project storage can be queried.

    Returns: (triage.experiments.SingleThreadedExperiment)
    """
    populate_source_data(shared_db_engine)
    base_config = sample_config()
    experiment = SingleThreadedExperiment(
        base_config,
        db_engine=shared_db_engine,
        project_path=shared_project_storage.project_path
    )
    experiment.run()
    return experiment
Ejemplo n.º 15
0
    def test_load_if_right_version(self):
        experiment_config = sample_config()
        experiment_config["config_version"] = CONFIG_VERSION
        with testing.postgresql.Postgresql() as postgresql, TemporaryDirectory(
        ) as temp_dir, mock.patch("triage.util.conf.open",
                                  side_effect=open_side_effect) as mock_file:
            db_engine = create_engine(postgresql.url())
            experiment = SingleThreadedExperiment(
                config=experiment_config,
                db_engine=db_engine,
                project_path=os.path.join(temp_dir, "inspections"),
            )

        assert isinstance(experiment, SingleThreadedExperiment)
Ejemplo n.º 16
0
 def experiment(self):
     self.root.setup()  # Loading configuration (if exists)
     db_url = self.root.db_url
     config = self._load_config()
     db_engine = create_engine(db_url)
     common_kwargs = {
         "db_engine":
         db_engine,
         "project_path":
         self.args.project_path,
         "config":
         config,
         "replace":
         self.args.replace,
         "materialize_subquery_fromobjs":
         self.args.materialize_fromobjs,
         "features_ignore_cohort":
         self.args.features_ignore_cohort,
         "matrix_storage_class":
         self.matrix_storage_map[self.args.matrix_format],
         "profile":
         self.args.profile,
         "save_predictions":
         self.args.save_predictions,
         "skip_validation":
         not self.args.validate
     }
     logger.info(f"Setting up the experiment")
     logger.info(f"Configuration file: {self.args.config}")
     logger.info(f"Results will be stored in DB: {self.root.db_url}")
     logger.info(f"Artifacts will be saved in {self.args.project_path}")
     try:
         if self.args.n_db_processes > 1 or self.args.n_processes > 1:
             experiment = MultiCoreExperiment(
                 n_db_processes=self.args.n_db_processes,
                 n_processes=self.args.n_processes,
                 **common_kwargs,
             )
             logger.info(
                 f"Experiment will run in multi core  mode using {self.args.n_processes} processes and {self.args.n_db_processes} db processes"
             )
         else:
             experiment = SingleThreadedExperiment(**common_kwargs)
             logger.info("Experiment will run in serial fashion")
         return experiment
     except Exception:
         logger.exception("Error occurred while creating the experiment!")
         logger.info(
             f"Experiment [config file: {self.args.config}] failed at creation"
         )
Ejemplo n.º 17
0
def test_profiling(db_engine):
    populate_source_data(db_engine)
    with TemporaryDirectory() as temp_dir, mock.patch(
            "triage.util.conf.open",
            side_effect=open_side_effect) as mock_file:
        project_path = os.path.join(temp_dir, "inspections")
        SingleThreadedExperiment(
            config=sample_config(),
            db_engine=db_engine,
            project_path=project_path,
            profile=True,
        ).run()
        assert len(os.listdir(os.path.join(project_path,
                                           "profiling_stats"))) == 1
Ejemplo n.º 18
0
def test_experiment_tracker_exception(db_engine, project_path):
    experiment = SingleThreadedExperiment(
        config=sample_config(),
        db_engine=db_engine,
        project_path=project_path,
    )
    # no source data means this should blow up
    with pytest.raises(Exception):
        experiment.run()

    with scoped_session(db_engine) as session:
        experiment_run = session.query(ExperimentRun).get(experiment.run_id)
        assert experiment_run.current_status == ExperimentRunStatus.failed
        assert isinstance(experiment_run.last_updated_time, datetime.datetime)
        assert experiment_run.stacktrace
Ejemplo n.º 19
0
    def test_load_if_right_version(self):
        experiment_config = sample_config()
        experiment_config['config_version'] = CONFIG_VERSION
        with testing.postgresql.Postgresql() as postgresql:
            db_engine = create_engine(postgresql.url())
            ensure_db(db_engine)
            with TemporaryDirectory() as temp_dir:
                experiment = SingleThreadedExperiment(
                    config=experiment_config,
                    db_engine=db_engine,
                    model_storage_class=FSModelStorageEngine,
                    project_path=os.path.join(temp_dir, 'inspections'),
                )

        assert isinstance(experiment, SingleThreadedExperiment)
Ejemplo n.º 20
0
def finished_experiment(shared_db_engine, shared_project_storage):
    """A successfully-run experiment. Its database schemas and project storage can be queried.

    Returns: (triage.experiments.SingleThreadedExperiment)
    """
    populate_source_data(shared_db_engine)
    base_config = sample_config()
    with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file:
        experiment = SingleThreadedExperiment(
            base_config,
            db_engine=shared_db_engine,
            project_path=shared_project_storage.project_path,
        )
    experiment.run()
    return experiment
Ejemplo n.º 21
0
def main():
    args = parse_args()

    dburl = os.environ['DBURL']
    hiv_engine = create_engine(dburl, pool_pre_ping=True)

    with open(args.config_path) as f:
        experiment_config = yaml.load(f)
    experiment = SingleThreadedExperiment(config=experiment_config,
                                          db_engine=hiv_engine,
                                          project_path=args.project_path,
                                          replace=False)

    experiment.validate()
    experiment.run()
Ejemplo n.º 22
0
def prepare_experiment(config):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        populate_source_data(db_engine)
        with TemporaryDirectory() as temp_dir:
            with mock.patch(
                "triage.util.conf.open", side_effect=open_side_effect
            ) as mock_file:
                experiment = SingleThreadedExperiment(
                    config=config,
                    db_engine=db_engine,
                    project_path=os.path.join(temp_dir, "inspections"),
                    cleanup=False,
                    partial_run=True,
                )
                yield experiment
Ejemplo n.º 23
0
def test_experiment_tracker_exception(db_engine, project_path):
    with mock.patch("triage.util.conf.open",
                    side_effect=open_side_effect) as mock_file:
        experiment = SingleThreadedExperiment(
            config=sample_config(),
            db_engine=db_engine,
            project_path=project_path,
        )
    # no source data means this should blow up
    with pytest.raises(Exception):
        experiment.run()

    with scoped_session(db_engine) as session:
        experiment_run = session.query(TriageRun).get(experiment.run_id)
        assert experiment_run.current_status == TriageRunStatus.failed
        assert isinstance(experiment_run.last_updated_time, datetime.datetime)
        assert experiment_run.stacktrace
Ejemplo n.º 24
0
 def experiment(self):
     db_url = self.root.db_url
     config = yaml.load(self.args.config)
     db_engine = create_engine(db_url)
     common_kwargs = {
         'db_engine': db_engine,
         'project_path': self.args.project_path,
         'config': config,
         'replace': self.args.replace,
     }
     if self.args.n_db_processes > 1 or self.args.n_processes > 1:
         experiment = MultiCoreExperiment(
             n_db_processes=self.args.n_db_processes,
             n_processes=self.args.n_processes,
             **common_kwargs
         )
     else:
         experiment = SingleThreadedExperiment(**common_kwargs)
     return experiment
Ejemplo n.º 25
0
def model_evaluator(shared_db_engine, shared_project_storage):
    """Returns an instantiated ModelEvaluator available at module scope"""
    populate_source_data(shared_db_engine)
    base_config = sample_config()
    # We need to have an ensemble model to test ModelEvaluator correctly
    # so we can't use the finished_experiment fixture"""
    base_config['grid_config'] = {
        'sklearn.ensemble.ExtraTreesClassifier': {
            'n_estimators': [10],
            'criterion': ['gini'],
            'max_depth': [1],
            'max_features': ['sqrt'],
            'min_samples_split': [2],
        }
    }
    SingleThreadedExperiment(
        base_config,
        db_engine=shared_db_engine,
        project_path=shared_project_storage.project_path).run()
    return ModelEvaluator(1, 1, shared_db_engine)
Ejemplo n.º 26
0
def model_evaluator(shared_db_engine, shared_project_storage):
    """Returns an instantiated ModelEvaluator available at module scope"""
    populate_source_data(shared_db_engine)
    base_config = sample_config()
    # We need to have an ensemble model to test ModelEvaluator correctly
    # so we can't use the finished_experiment fixture"""
    base_config["grid_config"] = {
        "sklearn.ensemble.ExtraTreesClassifier": {
            "n_estimators": [10],
            "criterion": ["gini"],
            "max_depth": [1],
            "max_features": ["sqrt"],
            "min_samples_split": [2],
        }
    }
    with mock.patch("triage.util.conf.open",
                    side_effect=open_side_effect) as mock_file:
        SingleThreadedExperiment(
            base_config,
            db_engine=shared_db_engine,
            project_path=shared_project_storage.project_path,
        ).run()
    return ModelEvaluator(1, 1, shared_db_engine)
Ejemplo n.º 27
0
import os
import sqlalchemy
import yaml

from catwalk.storage import FSModelStorageEngine
from triage.experiments import SingleThreadedExperiment

food_db = os.environ.get('FOOD_DB_URL')

print(food_db)

with open('inspections-training.yaml') as f:
    experiment_config = yaml.load(f)

experiment = SingleThreadedExperiment(
    config=experiment_config,
    db_engine=sqlalchemy.create_engine(food_db),
    model_storage_class=FSModelStorageEngine,
    project_path='./triage-generated')

experiment.run()