def test_to_file_defaults_to_raise_if_exists(_testwritetdb): "Attempting to use Database.to_file should raise by default if it exists" fname = _testwritetdb test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file with pytest.raises(FileExistsError): test_dbf.to_file(fname) # test if_exists behavior
def test_to_file_raises_with_bad_if_exists_argument(): "Database.to_file should raise if a bad behavior string is passed to if_exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file test_dbf.to_file(fname, if_exists='TEST_BAD_ARGUMENT') # test if_exists behavior
def test_to_file_raises_with_bad_if_exists_argument(_testwritetdb): "Database.to_file should raise if a bad behavior string is passed to if_exists" fname = _testwritetdb test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file with pytest.raises(FileExistsError): test_dbf.to_file( fname, if_exists='TEST_BAD_ARGUMENT') # test if_exists behavior
def test_to_file_overwrites_with_if_exists_argument(): "Database.to_file should overwrite if 'overwrite' is passed to if_exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file inital_modification_time = os.path.getmtime(fname) test_dbf.to_file(fname, if_exists='overwrite') # test if_exists behavior overwrite_modification_time = os.path.getmtime(fname) assert overwrite_modification_time > inital_modification_time
def test_to_file_overwrites_with_if_exists_argument(): import time "Database.to_file should overwrite if 'overwrite' is passed to if_exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file inital_modification_time = os.path.getmtime(fname) time.sleep(1) # this test can fail intermittently without waiting. test_dbf.to_file(fname, if_exists='overwrite') # test if_exists behavior overwrite_modification_time = os.path.getmtime(fname) assert overwrite_modification_time > inital_modification_time
def test_to_file_defaults_to_raise_if_exists(): "Attempting to use Database.to_file should raise by default if it exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file test_dbf.to_file(fname) # test if_exists behavior
def run_espei(run_settings): """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary. Parameters ---------- run_settings : dict Dictionary of input settings Returns ------- Either a Database (for generate parameters only) or a tuple of (Database, sampler) """ run_settings = get_run_settings(run_settings) system_settings = run_settings['system'] output_settings = run_settings['output'] generate_parameters_settings = run_settings.get('generate_parameters') mcmc_settings = run_settings.get('mcmc') # handle verbosity verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} logging.basicConfig(level=verbosity[output_settings['verbosity']]) # load datasets and handle i/o logging.debug('Loading and checking datasets.') dataset_path = system_settings['datasets'] datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json'))) if len(datasets.all()) == 0: logging.warning( 'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.' .format(dataset_path)) logging.debug('Finished checking datasets') with open(system_settings['phase_models']) as fp: phase_models = json.load(fp) if generate_parameters_settings is not None: refdata = generate_parameters_settings['ref_state'] excess_model = generate_parameters_settings['excess_model'] dbf = generate_parameters(phase_models, datasets, refdata, excess_model) dbf.to_file(output_settings['output_db'], if_exists='overwrite') if mcmc_settings is not None: tracefile = output_settings['tracefile'] probfile = output_settings['probfile'] # check that the MCMC output files do not already exist # only matters if we are actually running MCMC if os.path.exists(tracefile): raise OSError( 'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.' .format(tracefile)) if os.path.exists(probfile): raise OSError( 'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.' .format(probfile)) # scheduler setup if mcmc_settings['scheduler'] == 'MPIPool': # check that cores is not an input setting if mcmc_settings.get('cores') != None: logging.warning("MPI does not take the cores input setting.") from emcee.utils import MPIPool # code recommended by emcee: if not master, wait for instructions then exit client = MPIPool() if not client.is_master(): logging.debug( 'MPIPool is not master. Waiting for instructions...') client.wait() sys.exit(0) logging.info("Using MPIPool on {} MPI ranks".format(client.size)) elif mcmc_settings['scheduler'] == 'dask': from distributed import LocalCluster cores = mcmc_settings.get('cores', multiprocessing.cpu_count()) if (cores > multiprocessing.cpu_count()): cores = multiprocessing.cpu_count() logging.warning( "The number of cores chosen is larger than available. " "Defaulting to run on the {} available cores.".format( cores)) scheduler = LocalCluster(n_workers=cores, threads_per_worker=1, processes=True) client = ImmediateClient(scheduler) client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']]) logging.info("Running with dask scheduler: %s [%s cores]" % (scheduler, sum(client.ncores().values()))) try: logging.info( "bokeh server for dask scheduler at localhost:{}".format( client.scheduler_info()['services']['bokeh'])) except KeyError: logging.info("Install bokeh to use the dask bokeh server.") elif mcmc_settings['scheduler'] == 'emcee': from emcee.interruptible_pool import InterruptiblePool cores = mcmc_settings.get('cores', multiprocessing.cpu_count()) if (cores > multiprocessing.cpu_count()): cores = multiprocessing.cpu_count() logging.warning( "The number of cores chosen is larger than available. " "Defaulting to run on the {} available cores.".format( cores)) client = InterruptiblePool(processes=cores) logging.info("Using multiprocessing on {} cores".format(cores)) elif mcmc_settings['scheduler'] == 'None': client = None logging.info( "Not using a parallel scheduler. ESPEI is running MCMC on a single core." ) # get a Database if mcmc_settings.get('input_db'): dbf = Database(mcmc_settings.get('input_db')) # load the restart chain if needed if mcmc_settings.get('restart_chain'): restart_chain = np.load(mcmc_settings.get('restart_chain')) else: restart_chain = None # load the remaning mcmc fitting parameters mcmc_steps = mcmc_settings.get('mcmc_steps') save_interval = mcmc_settings.get('mcmc_save_interval') chains_per_parameter = mcmc_settings.get('chains_per_parameter') chain_std_deviation = mcmc_settings.get('chain_std_deviation') deterministic = mcmc_settings.get('deterministic') dbf, sampler = mcmc_fit( dbf, datasets, scheduler=client, mcmc_steps=mcmc_steps, chains_per_parameter=chains_per_parameter, chain_std_deviation=chain_std_deviation, save_interval=save_interval, tracefile=tracefile, probfile=probfile, restart_chain=restart_chain, deterministic=deterministic, ) dbf.to_file(output_settings['output_db'], if_exists='overwrite') # close the scheduler, if possible if hasattr(client, 'close'): client.close() return dbf, sampler return dbf
def test_to_file_raises_with_bad_if_exists_argument(): "Database.to_file should raise if a bad behavior string is passed to if_exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file test_dbf.to_file(fname, if_exists='TEST_BAD_ARGUMENT') # test if_exists behavior
def test_to_file_defaults_to_raise_if_exists(): "Attempting to use Database.to_file should raise by default if it exists" fname = 'testwritedb.tdb' test_dbf = Database(ALNIPT_TDB) test_dbf.to_file(fname) # establish the initial file test_dbf.to_file(fname) # test if_exists behavior
def run_espei(run_settings): """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary. Parameters ---------- run_settings : dict Dictionary of input settings Returns ------- Either a Database (for generate parameters only) or a tuple of (Database, sampler) """ run_settings = get_run_settings(run_settings) system_settings = run_settings['system'] output_settings = run_settings['output'] generate_parameters_settings = run_settings.get('generate_parameters') mcmc_settings = run_settings.get('mcmc') # handle verbosity verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} logging.basicConfig(level=verbosity[output_settings['verbosity']]) log_version_info() # load datasets and handle i/o logging.debug('Loading and checking datasets.') dataset_path = system_settings['datasets'] datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json'))) if len(datasets.all()) == 0: logging.warning('No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'.format(dataset_path)) logging.debug('Finished checking datasets') with open(system_settings['phase_models']) as fp: phase_models = json.load(fp) if generate_parameters_settings is not None: refdata = generate_parameters_settings['ref_state'] excess_model = generate_parameters_settings['excess_model'] ridge_alpha = generate_parameters_settings['ridge_alpha'] dbf = generate_parameters(phase_models, datasets, refdata, excess_model, ridge_alpha=ridge_alpha) dbf.to_file(output_settings['output_db'], if_exists='overwrite') if mcmc_settings is not None: tracefile = output_settings['tracefile'] probfile = output_settings['probfile'] # check that the MCMC output files do not already exist # only matters if we are actually running MCMC if os.path.exists(tracefile): raise OSError('Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'.format(tracefile)) if os.path.exists(probfile): raise OSError('Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'.format(probfile)) # scheduler setup if mcmc_settings['scheduler'] == 'dask': _raise_dask_work_stealing() # check for work-stealing from distributed import LocalCluster cores = mcmc_settings.get('cores', multiprocessing.cpu_count()) if (cores > multiprocessing.cpu_count()): cores = multiprocessing.cpu_count() logging.warning("The number of cores chosen is larger than available. " "Defaulting to run on the {} available cores.".format(cores)) # TODO: make dask-scheduler-verbosity a YAML input so that users can debug. Should have the same log levels as verbosity scheduler = LocalCluster(n_workers=cores, threads_per_worker=1, processes=True, memory_limit=0) client = ImmediateClient(scheduler) client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']]) logging.info("Running with dask scheduler: %s [%s cores]" % (scheduler, sum(client.ncores().values()))) try: logging.info( "bokeh server for dask scheduler at localhost:{}".format( client.scheduler_info()['services']['bokeh'])) except KeyError: logging.info("Install bokeh to use the dask bokeh server.") elif mcmc_settings['scheduler'] == 'None': client = None logging.info("Not using a parallel scheduler. ESPEI is running MCMC on a single core.") else: # we were passed a scheduler file name _raise_dask_work_stealing() # check for work-stealing client = ImmediateClient(scheduler_file=mcmc_settings['scheduler']) client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']]) logging.info("Running with dask scheduler: %s [%s cores]" % (client.scheduler, sum(client.ncores().values()))) # get a Database if mcmc_settings.get('input_db'): dbf = Database(mcmc_settings.get('input_db')) # load the restart trace if needed if mcmc_settings.get('restart_trace'): restart_trace = np.load(mcmc_settings.get('restart_trace')) else: restart_trace = None # load the remaining mcmc fitting parameters iterations = mcmc_settings.get('iterations') save_interval = mcmc_settings.get('save_interval') chains_per_parameter = mcmc_settings.get('chains_per_parameter') chain_std_deviation = mcmc_settings.get('chain_std_deviation') deterministic = mcmc_settings.get('deterministic') dbf, sampler = mcmc_fit(dbf, datasets, scheduler=client, iterations=iterations, chains_per_parameter=chains_per_parameter, chain_std_deviation=chain_std_deviation, save_interval=save_interval, tracefile=tracefile, probfile=probfile, restart_trace=restart_trace, deterministic=deterministic, ) dbf.to_file(output_settings['output_db'], if_exists='overwrite') # close the scheduler, if possible if hasattr(client, 'close'): client.close() return dbf, sampler return dbf