Ejemplo n.º 1
0
def test_to_file_defaults_to_raise_if_exists(_testwritetdb):
    "Attempting to use Database.to_file should raise by default if it exists"
    fname = _testwritetdb
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    with pytest.raises(FileExistsError):
        test_dbf.to_file(fname)  # test if_exists behavior
Ejemplo n.º 2
0
def test_to_file_raises_with_bad_if_exists_argument():
    "Database.to_file should raise if a bad behavior string is passed to if_exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    test_dbf.to_file(fname,
                     if_exists='TEST_BAD_ARGUMENT')  # test if_exists behavior
Ejemplo n.º 3
0
def test_to_file_raises_with_bad_if_exists_argument(_testwritetdb):
    "Database.to_file should raise if a bad behavior string is passed to if_exists"
    fname = _testwritetdb
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    with pytest.raises(FileExistsError):
        test_dbf.to_file(
            fname, if_exists='TEST_BAD_ARGUMENT')  # test if_exists behavior
Ejemplo n.º 4
0
def test_to_file_overwrites_with_if_exists_argument():
    "Database.to_file should overwrite if 'overwrite' is passed to if_exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    inital_modification_time = os.path.getmtime(fname)
    test_dbf.to_file(fname, if_exists='overwrite')  # test if_exists behavior
    overwrite_modification_time = os.path.getmtime(fname)
    assert overwrite_modification_time > inital_modification_time
Ejemplo n.º 5
0
def test_to_file_overwrites_with_if_exists_argument():
    import time
    "Database.to_file should overwrite if 'overwrite' is passed to if_exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    inital_modification_time = os.path.getmtime(fname)
    time.sleep(1)  # this test can fail intermittently without waiting.
    test_dbf.to_file(fname, if_exists='overwrite')  # test if_exists behavior
    overwrite_modification_time = os.path.getmtime(fname)
    assert overwrite_modification_time > inital_modification_time
Ejemplo n.º 6
0
def test_to_file_defaults_to_raise_if_exists():
    "Attempting to use Database.to_file should raise by default if it exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    test_dbf.to_file(fname)  # test if_exists behavior
Ejemplo n.º 7
0
def run_espei(run_settings):
    """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary.

    Parameters
    ----------
    run_settings : dict
        Dictionary of input settings

    Returns
    -------
    Either a Database (for generate parameters only) or a tuple of (Database, sampler)
    """
    run_settings = get_run_settings(run_settings)
    system_settings = run_settings['system']
    output_settings = run_settings['output']
    generate_parameters_settings = run_settings.get('generate_parameters')
    mcmc_settings = run_settings.get('mcmc')

    # handle verbosity
    verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
    logging.basicConfig(level=verbosity[output_settings['verbosity']])

    # load datasets and handle i/o
    logging.debug('Loading and checking datasets.')
    dataset_path = system_settings['datasets']
    datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json')))
    if len(datasets.all()) == 0:
        logging.warning(
            'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'
            .format(dataset_path))
    logging.debug('Finished checking datasets')

    with open(system_settings['phase_models']) as fp:
        phase_models = json.load(fp)

    if generate_parameters_settings is not None:
        refdata = generate_parameters_settings['ref_state']
        excess_model = generate_parameters_settings['excess_model']
        dbf = generate_parameters(phase_models, datasets, refdata,
                                  excess_model)
        dbf.to_file(output_settings['output_db'], if_exists='overwrite')

    if mcmc_settings is not None:
        tracefile = output_settings['tracefile']
        probfile = output_settings['probfile']
        # check that the MCMC output files do not already exist
        # only matters if we are actually running MCMC
        if os.path.exists(tracefile):
            raise OSError(
                'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'
                .format(tracefile))
        if os.path.exists(probfile):
            raise OSError(
                'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'
                .format(probfile))

        # scheduler setup
        if mcmc_settings['scheduler'] == 'MPIPool':
            # check that cores is not an input setting
            if mcmc_settings.get('cores') != None:
                logging.warning("MPI does not take the cores input setting.")
            from emcee.utils import MPIPool
            # code recommended by emcee: if not master, wait for instructions then exit
            client = MPIPool()
            if not client.is_master():
                logging.debug(
                    'MPIPool is not master. Waiting for instructions...')
                client.wait()
                sys.exit(0)
            logging.info("Using MPIPool on {} MPI ranks".format(client.size))
        elif mcmc_settings['scheduler'] == 'dask':
            from distributed import LocalCluster
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            scheduler = LocalCluster(n_workers=cores,
                                     threads_per_worker=1,
                                     processes=True)
            client = ImmediateClient(scheduler)
            client.run(logging.basicConfig,
                       level=verbosity[output_settings['verbosity']])
            logging.info("Running with dask scheduler: %s [%s cores]" %
                         (scheduler, sum(client.ncores().values())))
            try:
                logging.info(
                    "bokeh server for dask scheduler at localhost:{}".format(
                        client.scheduler_info()['services']['bokeh']))
            except KeyError:
                logging.info("Install bokeh to use the dask bokeh server.")
        elif mcmc_settings['scheduler'] == 'emcee':
            from emcee.interruptible_pool import InterruptiblePool
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            client = InterruptiblePool(processes=cores)
            logging.info("Using multiprocessing on {} cores".format(cores))
        elif mcmc_settings['scheduler'] == 'None':
            client = None
            logging.info(
                "Not using a parallel scheduler. ESPEI is running MCMC on a single core."
            )

        # get a Database
        if mcmc_settings.get('input_db'):
            dbf = Database(mcmc_settings.get('input_db'))

        # load the restart chain if needed
        if mcmc_settings.get('restart_chain'):
            restart_chain = np.load(mcmc_settings.get('restart_chain'))
        else:
            restart_chain = None

        # load the remaning mcmc fitting parameters
        mcmc_steps = mcmc_settings.get('mcmc_steps')
        save_interval = mcmc_settings.get('mcmc_save_interval')
        chains_per_parameter = mcmc_settings.get('chains_per_parameter')
        chain_std_deviation = mcmc_settings.get('chain_std_deviation')
        deterministic = mcmc_settings.get('deterministic')

        dbf, sampler = mcmc_fit(
            dbf,
            datasets,
            scheduler=client,
            mcmc_steps=mcmc_steps,
            chains_per_parameter=chains_per_parameter,
            chain_std_deviation=chain_std_deviation,
            save_interval=save_interval,
            tracefile=tracefile,
            probfile=probfile,
            restart_chain=restart_chain,
            deterministic=deterministic,
        )

        dbf.to_file(output_settings['output_db'], if_exists='overwrite')
        # close the scheduler, if possible
        if hasattr(client, 'close'):
            client.close()
        return dbf, sampler
    return dbf
Ejemplo n.º 8
0
def test_to_file_raises_with_bad_if_exists_argument():
    "Database.to_file should raise if a bad behavior string is passed to if_exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    test_dbf.to_file(fname, if_exists='TEST_BAD_ARGUMENT')  # test if_exists behavior
Ejemplo n.º 9
0
def test_to_file_defaults_to_raise_if_exists():
    "Attempting to use Database.to_file should raise by default if it exists"
    fname = 'testwritedb.tdb'
    test_dbf = Database(ALNIPT_TDB)
    test_dbf.to_file(fname)  # establish the initial file
    test_dbf.to_file(fname)  # test if_exists behavior
Ejemplo n.º 10
0
def run_espei(run_settings):
    """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary.

    Parameters
    ----------
    run_settings : dict
        Dictionary of input settings

    Returns
    -------
    Either a Database (for generate parameters only) or a tuple of (Database, sampler)
    """
    run_settings = get_run_settings(run_settings)
    system_settings = run_settings['system']
    output_settings = run_settings['output']
    generate_parameters_settings = run_settings.get('generate_parameters')
    mcmc_settings = run_settings.get('mcmc')

    # handle verbosity
    verbosity = {0: logging.WARNING,
                 1: logging.INFO,
                 2: logging.DEBUG}
    logging.basicConfig(level=verbosity[output_settings['verbosity']])

    log_version_info()

    # load datasets and handle i/o
    logging.debug('Loading and checking datasets.')
    dataset_path = system_settings['datasets']
    datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json')))
    if len(datasets.all()) == 0:
        logging.warning('No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'.format(dataset_path))
    logging.debug('Finished checking datasets')

    with open(system_settings['phase_models']) as fp:
        phase_models = json.load(fp)

    if generate_parameters_settings is not None:
        refdata = generate_parameters_settings['ref_state']
        excess_model = generate_parameters_settings['excess_model']
        ridge_alpha = generate_parameters_settings['ridge_alpha']
        dbf = generate_parameters(phase_models, datasets, refdata, excess_model, ridge_alpha=ridge_alpha)
        dbf.to_file(output_settings['output_db'], if_exists='overwrite')

    if mcmc_settings is not None:
        tracefile = output_settings['tracefile']
        probfile = output_settings['probfile']
        # check that the MCMC output files do not already exist
        # only matters if we are actually running MCMC
        if os.path.exists(tracefile):
            raise OSError('Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'.format(tracefile))
        if os.path.exists(probfile):
            raise OSError('Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'.format(probfile))

        # scheduler setup
        if mcmc_settings['scheduler'] == 'dask':
            _raise_dask_work_stealing()  # check for work-stealing
            from distributed import LocalCluster
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning("The number of cores chosen is larger than available. "
                                "Defaulting to run on the {} available cores.".format(cores))
            # TODO: make dask-scheduler-verbosity a YAML input so that users can debug. Should have the same log levels as verbosity
            scheduler = LocalCluster(n_workers=cores, threads_per_worker=1, processes=True, memory_limit=0)
            client = ImmediateClient(scheduler)
            client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']])
            logging.info("Running with dask scheduler: %s [%s cores]" % (scheduler, sum(client.ncores().values())))
            try:
                logging.info(
                    "bokeh server for dask scheduler at localhost:{}".format(
                        client.scheduler_info()['services']['bokeh']))
            except KeyError:
                logging.info("Install bokeh to use the dask bokeh server.")
        elif mcmc_settings['scheduler'] == 'None':
            client = None
            logging.info("Not using a parallel scheduler. ESPEI is running MCMC on a single core.")
        else: # we were passed a scheduler file name
            _raise_dask_work_stealing()  # check for work-stealing
            client = ImmediateClient(scheduler_file=mcmc_settings['scheduler'])
            client.run(logging.basicConfig, level=verbosity[output_settings['verbosity']])
            logging.info("Running with dask scheduler: %s [%s cores]" % (client.scheduler, sum(client.ncores().values())))

        # get a Database
        if mcmc_settings.get('input_db'):
            dbf = Database(mcmc_settings.get('input_db'))

        # load the restart trace if needed
        if mcmc_settings.get('restart_trace'):
            restart_trace = np.load(mcmc_settings.get('restart_trace'))
        else:
            restart_trace = None

        # load the remaining mcmc fitting parameters
        iterations = mcmc_settings.get('iterations')
        save_interval = mcmc_settings.get('save_interval')
        chains_per_parameter = mcmc_settings.get('chains_per_parameter')
        chain_std_deviation = mcmc_settings.get('chain_std_deviation')
        deterministic = mcmc_settings.get('deterministic')

        dbf, sampler = mcmc_fit(dbf, datasets, scheduler=client, iterations=iterations,
                                chains_per_parameter=chains_per_parameter,
                                chain_std_deviation=chain_std_deviation,
                                save_interval=save_interval,
                                tracefile=tracefile, probfile=probfile,
                                restart_trace=restart_trace,
                                deterministic=deterministic,
                                )

        dbf.to_file(output_settings['output_db'], if_exists='overwrite')
        # close the scheduler, if possible
        if hasattr(client, 'close'):
                client.close()
        return dbf, sampler
    return dbf