def run_exp(config_file, plot_timechops=True, run_exp=True, n_jobs=1): if plot_timechops: visualize_timechop(config_file) config, sql_engine, proj_folder = setup_experiment(config_file) if run_exp: if n_jobs> 1: experiment = MultiCoreExperiment( config=config, db_engine=sql_engine, n_processes=n_jobs, n_db_processes=n_jobs, project_path=proj_folder, replace=False, cleanup=True ) else: experiment = SingleThreadedExperiment( config=config, db_engine=sql_engine, project_path=proj_folder, cleanup=True ) st = time.time() experiment.run() en = time.time() print('Took {} seconds to run the experiement'.format(en-st))
def test_experiment_tracker(test_engine, project_path): experiment = MultiCoreExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, n_processes=4, ) experiment_run = Session(bind=test_engine).query(ExperimentRun).get(experiment.run_id) assert experiment_run.current_status == ExperimentRunStatus.started assert experiment_run.experiment_hash == experiment.experiment_hash assert experiment_run.experiment_class_path == 'triage.experiments.multicore.MultiCoreExperiment' assert experiment_run.platform assert experiment_run.os_user assert experiment_run.installed_libraries assert experiment_run.matrices_skipped == 0 assert experiment_run.matrices_errored == 0 assert experiment_run.matrices_made == 0 assert experiment_run.models_skipped == 0 assert experiment_run.models_errored == 0 assert experiment_run.models_made == 0 experiment.run() experiment_run = Session(bind=test_engine).query(ExperimentRun).get(experiment.run_id) assert experiment_run.start_method == "run" assert experiment_run.matrices_made == len(experiment.matrix_build_tasks) assert experiment_run.matrices_skipped == 0 assert experiment_run.matrices_errored == 0 assert experiment_run.models_skipped == 0 assert experiment_run.models_errored == 0 assert experiment_run.models_made == len(list(task['train_kwargs']['model_hash'] for batch in experiment._all_train_test_batches() for task in batch.tasks)) assert isinstance(experiment_run.matrix_building_started, datetime.datetime) assert isinstance(experiment_run.model_building_started, datetime.datetime) assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert not experiment_run.stacktrace assert experiment_run.current_status == ExperimentRunStatus.completed
def run(config_filename, verbose, replace, predictions, validate_only): # configure logging log_filename = 'logs/modeling_{}'.format( str(datetime.datetime.now()).replace(' ', '_').replace(':', '')) if verbose: logging_level = logging.DEBUG else: logging_level = logging.INFO logging.basicConfig( format='%(asctime)s %(process)d %(levelname)s: %(message)s', level=logging_level, handlers=[logging.FileHandler(log_filename), logging.StreamHandler()]) # config_filename = 'experiment_config' features_directory = 'features' # load main experiment config with open('config/{}.yaml'.format(config_filename)) as f: experiment_config = yaml.load(f) # load feature configs and update experiment config with their contents all_feature_aggregations = [] for filename in os.listdir('config/{}/'.format(features_directory)): with open('config/{}/{}'.format(features_directory, filename)) as f: feature_aggregations = yaml.load(f) for aggregation in feature_aggregations: all_feature_aggregations.append(aggregation) experiment_config['feature_aggregations'] = all_feature_aggregations with open('config/db_default_profile.json') as f: DB_CONFIG = json.load(f) db_engine = create_engine( f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['pass']}@{DB_CONFIG['host']}/{DB_CONFIG['db']}" ) experiment = MultiCoreExperiment( config=experiment_config, db_engine=db_engine, project_path=PROJECT_PATH, replace=replace, n_db_processes=4, n_processes=40, save_predictions=predictions, ) experiment.validate() if not validate_only: experiment.run()
def test_experiment_tracker(test_engine, project_path): with mock.patch("triage.util.conf.open", side_effect=open_side_effect) as mock_file: experiment = MultiCoreExperiment( config=sample_config(), db_engine=test_engine, project_path=project_path, n_processes=4, ) experiment_run = Session(bind=test_engine).query(TriageRun).get( experiment.run_id) assert experiment_run.current_status == TriageRunStatus.started assert experiment_run.run_hash == experiment.experiment_hash assert experiment_run.run_type == "experiment" assert (experiment_run.experiment_class_path == "triage.experiments.multicore.MultiCoreExperiment") assert experiment_run.platform assert experiment_run.os_user assert experiment_run.installed_libraries assert experiment_run.matrices_skipped == 0 assert experiment_run.matrices_errored == 0 assert experiment_run.matrices_made == 0 assert experiment_run.models_skipped == 0 assert experiment_run.models_errored == 0 assert experiment_run.models_made == 0 experiment.run() experiment_run = Session(bind=test_engine).query(TriageRun).get( experiment.run_id) assert experiment_run.start_method == "run" assert experiment_run.matrices_made == len(experiment.matrix_build_tasks) assert experiment_run.matrices_skipped == 0 assert experiment_run.matrices_errored == 0 assert experiment_run.models_skipped == 0 assert experiment_run.models_errored == 0 assert experiment_run.models_made == len( list(task["train_kwargs"]["model_hash"] for batch in experiment._all_train_test_batches() for task in batch.tasks)) assert isinstance(experiment_run.matrix_building_started, datetime.datetime) assert isinstance(experiment_run.model_building_started, datetime.datetime) assert isinstance(experiment_run.last_updated_time, datetime.datetime) assert not experiment_run.stacktrace assert experiment_run.current_status == TriageRunStatus.completed
def run_experiment(experiment_file, output_path, replace): start_time = datetime.datetime.now() logging.info( f"Reading the file experiment configuration from {experiment_file}") # Load the experiment configuration file s3 = s3fs.S3FileSystem() with s3.open(experiment_file, 'rb') as f: experiment_config = yaml.load(f.read()) host = os.environ['POSTGRES_HOST'] user = os.environ['POSTGRES_USER'] db = os.environ['POSTGRES_DB'] password = os.environ['POSTGRES_PASSWORD'] port = os.environ['POSTGRES_PORT'] db_url = f"postgresql://{user}:{password}@{host}:{port}/{db}" logging.info( f"Using the database: postgresql://{user}:XXXXX@{host}:{port}/{db}") try: n_processes = int(os.environ.get('NUMBER_OF_PROCESSES', 12)) except ValueError: n_processes = 12 try: n_db_processes = int(os.environ.get('NUMBER_OF_DB_PROCESSES', 6)) except ValueError: n_db_processes = 6 logging.info(f"The experiment will use {n_processes} cores in the host") logging.info( f"The output (matrices and models) of this experiment will be stored in {output_path}" ) logging.info( f"The experiment will utilize any preexisting matrix or model: {not replace}" ) logging.info(f"Creating experiment object") experiment = MultiCoreExperiment( n_processes=n_processes, n_db_processes=n_db_processes, config=experiment_config, db_engine=triage.create_engine(db_url), project_path=output_path, #matrix_storage_class=HDFMatrixStore, replace=replace, cleanup=True, cleanup_timeout=2) logging.info( f"Experiment created: all the file permissions, and db connections are OK" ) logging.info(f"Validating the experiment") experiment.validate() logging.info(""" The experiment configuration doesn't contain any obvious errors. Any error that occurs possibly is related to number of columns or collision in the column names, both due to PostgreSQL limitations. """) logging.debug(f"Experiment configuration: {experiment.config}") experiment_name = os.path.splitext(os.path.split(experiment_file)[1])[0] logging.info(f"Running the experiment: {experiment_name}") experiment.run() end_time = datetime.datetime.now() logging.info( f"Experiment {experiment_file} completed in {end_time - start_time} seconds" ) logging.info("Done!")
db_engine = create_engine(db_url) # loading config file with open('donors-choose-config.yaml', 'r') as fin: config = yaml.load(fin) # generating temporal config plot chopper = Timechop(**config['temporal_config']) # We aren't interested in seeing the entire feature_start_time represented # in our timechop plot. That would hide the interesting information. So we # set it to equal label_start_time for the plot. chopper.feature_start_time = chopper.label_start_time visualize_chops(chopper, save_target='triage_output/timechop.png') # creating experiment object experiment = MultiCoreExperiment( config=config, db_engine=db_engine, project_path='s3://dsapp-education-migrated/donors-choose', n_processes=32, n_db_processes=4, replace=False) experiment.run()