예제 #1
0
 def __init__(self, dbf):
     self.orig_dbf = copy.deepcopy(dbf)
     self.dbf = copy.deepcopy(dbf)
     parameters = {
         sym: unpack_piecewise(dbf.symbols[sym])
         for sym in database_symbols_to_fit(dbf)
     }
     ds = load_datasets([])  # empty TinyDB
     root = OptNode(parameters, ds)
     self.current_node = root
     self.staged_nodes = []
     self.graph = OptGraph(root)
예제 #2
0
def parameter_labels(dbf, formatted=True):
    parameter_symbols = database_symbols_to_fit(dbf)

    if formatted:
        parameter_labels = []
        for sym in parameter_symbols:
            fp = formatted_parameter(dbf, sym)
            label = "{}({})\n{}: {}".format(fp.phase_name, fp.interaction,
                                            fp.parameter_type, fp.term_symbol)
            parameter_labels.append(label)
        return parameter_labels
    else:
        return parameter_symbols
예제 #3
0
def test_equilibrium_thermochemical_context_is_pickleable(datasets_db):
    """Test that the context for equilibrium thermochemical data is pickleable"""
    datasets_db.insert(CU_MG_EQ_HMR_LIQUID)
    dbf = Database(CU_MG_TDB)

    symbols_to_fit = database_symbols_to_fit(dbf)
    initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
    prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess)
    ctx = setup_context(dbf, datasets_db)
    ctx.update(prior_dict)

    ctx_pickle = pickle.dumps(ctx)
    ctx_unpickled = pickle.loads(ctx_pickle)

    regular_predict  = EmceeOptimizer.predict(initial_guess, **ctx)
    unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled)
    assert np.isclose(regular_predict, unpickle_predict)
예제 #4
0
def test_zpf_context_is_pickleable(datasets_db):
    """Test that the context for ZPF data is pickleable"""
    datasets_db.insert(CU_MG_DATASET_ZPF_ZERO_ERROR)
    dbf = Database(CU_MG_TDB)

    symbols_to_fit = database_symbols_to_fit(dbf)
    initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
    prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess)
    ctx = setup_context(dbf, datasets_db)
    ctx.update(prior_dict)

    ctx_pickle = pickle.dumps(ctx)
    ctx_unpickled = pickle.loads(ctx_pickle)

    regular_predict  = EmceeOptimizer.predict(initial_guess, **ctx)
    unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled)
    assert np.isclose(regular_predict, unpickle_predict)
def get_thermochemical_data(dbf,
                            comps,
                            phases,
                            datasets,
                            weight_dict=None,
                            symbols_to_fit=None):
    """

    Parameters
    ----------
    dbf : pycalphad.Database
        Database to consider
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    weight_dict : dict
        Dictionary of weights for each data type, e.g. {'HM': 200, 'SM': 2}
    symbols_to_fit : list
        Parameters to fit. Used to build the models and PhaseRecords.

    Returns
    -------
    list
        List of data dictionaries to iterate over
    """
    # phase by phase, then property by property, then by model exclusions
    if weight_dict is None:
        weight_dict = {}

    if symbols_to_fit is not None:
        symbols_to_fit = sorted(symbols_to_fit)
    else:
        symbols_to_fit = database_symbols_to_fit(dbf)

    # estimated from NIST TRC uncertainties
    property_std_deviation = {
        'HM': 500.0 / weight_dict.get('HM', 1.0),  # J/mol
        'SM': 0.2 / weight_dict.get('SM', 1.0),  # J/K-mol
        'CPM': 0.2 / weight_dict.get('CPM', 1.0),  # J/K-mol
    }
    properties = [
        'HM_FORM', 'SM_FORM', 'CPM_FORM', 'HM_MIX', 'SM_MIX', 'CPM_MIX'
    ]

    ref_states = []
    for el in get_pure_elements(dbf, comps):
        ref_state = ReferenceState(el, dbf.refstates[el]['phase'])
        ref_states.append(ref_state)
    all_data_dicts = []
    for phase_name in phases:
        for prop in properties:
            desired_data = get_prop_data(
                comps,
                phase_name,
                prop,
                datasets,
                additional_query=(where('solver').exists()))
            if len(desired_data) == 0:
                continue
            unique_exclusions = set([
                tuple(sorted(d.get('excluded_model_contributions', [])))
                for d in desired_data
            ])
            for exclusion in unique_exclusions:
                data_dict = {
                    'phase_name': phase_name,
                    'prop': prop,
                    # needs the following keys to be added:
                    # species, calculate_dict, phase_records, model, output, weights
                }
                # get all the data with these model exclusions
                if exclusion == tuple([]):
                    exc_search = (
                        ~where('excluded_model_contributions').exists()) & (
                            where('solver').exists())
                else:
                    exc_search = (where('excluded_model_contributions').test(
                        lambda x: tuple(sorted(x)) == exclusion)) & (
                            where('solver').exists())
                curr_data = get_prop_data(comps,
                                          phase_name,
                                          prop,
                                          datasets,
                                          additional_query=exc_search)
                calculate_dict = get_prop_samples(dbf, comps, phase_name,
                                                  curr_data)
                mod = Model(dbf, comps, phase_name, parameters=symbols_to_fit)
                if prop.endswith('_FORM'):
                    output = ''.join(prop.split('_')[:-1]) + 'R'
                    mod.shift_reference_state(
                        ref_states,
                        dbf,
                        contrib_mods={e: sympy.S.Zero
                                      for e in exclusion})
                else:
                    output = prop
                for contrib in exclusion:
                    mod.models[contrib] = sympy.S.Zero
                    mod.reference_model.models[contrib] = sympy.S.Zero
                species = sorted(unpack_components(dbf, comps), key=str)
                data_dict['species'] = species
                model = {phase_name: mod}
                statevar_dict = {
                    getattr(v, c, None): vals
                    for c, vals in calculate_dict.items()
                    if isinstance(getattr(v, c, None), v.StateVariable)
                }
                statevar_dict = OrderedDict(
                    sorted(statevar_dict.items(), key=lambda x: str(x[0])))
                str_statevar_dict = OrderedDict(
                    (str(k), vals) for k, vals in statevar_dict.items())
                phase_records = build_phase_records(
                    dbf,
                    species, [phase_name],
                    statevar_dict,
                    model,
                    output=output,
                    parameters={s: 0
                                for s in symbols_to_fit},
                    build_gradients=False,
                    build_hessians=False)
                data_dict['str_statevar_dict'] = str_statevar_dict
                data_dict['phase_records'] = phase_records
                data_dict['calculate_dict'] = calculate_dict
                data_dict['model'] = model
                data_dict['output'] = output
                data_dict['weights'] = np.array(
                    property_std_deviation[prop.split('_')[0]]) / np.array(
                        calculate_dict.pop('weights'))
                all_data_dicts.append(data_dict)
    return all_data_dicts
예제 #6
0
파일: context.py 프로젝트: bocklund/ESPEI
def setup_context(dbf,
                  datasets,
                  symbols_to_fit=None,
                  data_weights=None,
                  phase_models=None,
                  make_callables=True):
    """
    Set up a context dictionary for calculating error.

    Parameters
    ----------
    dbf : Database
        A pycalphad Database that will be fit
    datasets : PickleableTinyDB
        A database of single- and multi-phase data to fit
    symbols_to_fit : list of str
        List of symbols in the Database that will be fit. If None (default) are
        passed, then all parameters prefixed with `VV` followed by a number,
        e.g. VV0001 will be fit.

    Returns
    -------

    Notes
    -----
    A copy of the Database is made and used in the context. To commit changes
    back to the original database, the dbf.symbols.update method should be used.
    """
    dbf = copy.deepcopy(dbf)
    if phase_models is not None:
        comps = sorted(phase_models['components'])
    else:
        comps = sorted([sp for sp in dbf.elements])
    if symbols_to_fit is None:
        symbols_to_fit = database_symbols_to_fit(dbf)
    else:
        symbols_to_fit = sorted(symbols_to_fit)
    data_weights = data_weights if data_weights is not None else {}

    if len(symbols_to_fit) == 0:
        raise ValueError(
            'No degrees of freedom. Database must contain symbols starting with \'V\' or \'VV\', followed by a number.'
        )
    else:
        _log.info('Fitting %s degrees of freedom.', len(symbols_to_fit))

    for x in symbols_to_fit:
        if isinstance(dbf.symbols[x], symengine.Piecewise):
            _log.debug('Replacing %s in database', x)
            dbf.symbols[x] = dbf.symbols[x].args[0]

    # construct the models for each phase, substituting in the SymEngine symbol to fit.
    if phase_models is not None:
        model_dict = get_model_dict(phase_models)
    else:
        model_dict = {}
    _log.trace('Building phase models (this may take some time)')
    import time
    t1 = time.time()
    phases = sorted(
        filter_phases(dbf, unpack_components(dbf, comps), dbf.phases.keys()))
    parameters = dict(zip(symbols_to_fit, [0] * len(symbols_to_fit)))
    models = instantiate_models(dbf,
                                comps,
                                phases,
                                model=model_dict,
                                parameters=parameters)
    if make_callables:
        eq_callables = build_callables(dbf,
                                       comps,
                                       phases,
                                       models,
                                       parameter_symbols=symbols_to_fit,
                                       output='GM',
                                       build_gradients=True,
                                       build_hessians=True,
                                       additional_statevars={v.N, v.P, v.T})
    else:
        eq_callables = None
    t2 = time.time()
    _log.trace('Finished building phase models (%0.2fs)', t2 - t1)
    _log.trace(
        'Getting non-equilibrium thermochemical data (this may take some time)'
    )
    t1 = time.time()
    thermochemical_data = get_thermochemical_data(
        dbf,
        comps,
        phases,
        datasets,
        model=model_dict,
        weight_dict=data_weights,
        symbols_to_fit=symbols_to_fit)
    t2 = time.time()
    _log.trace('Finished getting non-equilibrium thermochemical data (%0.2fs)',
               t2 - t1)
    _log.trace(
        'Getting equilibrium thermochemical data (this may take some time)')
    t1 = time.time()
    eq_thermochemical_data = get_equilibrium_thermochemical_data(
        dbf,
        comps,
        phases,
        datasets,
        model=model_dict,
        parameters=parameters,
        data_weight_dict=data_weights)
    t2 = time.time()
    _log.trace('Finished getting equilibrium thermochemical data (%0.2fs)',
               t2 - t1)
    _log.trace('Getting ZPF data (this may take some time)')
    t1 = time.time()
    zpf_data = get_zpf_data(dbf,
                            comps,
                            phases,
                            datasets,
                            model=model_dict,
                            parameters=parameters)
    t2 = time.time()
    _log.trace('Finished getting ZPF data (%0.2fs)', t2 - t1)

    # context for the log probability function
    # for all cases, parameters argument addressed in MCMC loop
    error_context = {
        'symbols_to_fit': symbols_to_fit,
        'zpf_kwargs': {
            'zpf_data': zpf_data,
            'data_weight': data_weights.get('ZPF', 1.0),
        },
        'equilibrium_thermochemical_kwargs': {
            'eq_thermochemical_data': eq_thermochemical_data,
        },
        'thermochemical_kwargs': {
            'thermochemical_data': thermochemical_data,
        },
        'activity_kwargs': {
            'dbf': dbf,
            'comps': comps,
            'phases': phases,
            'datasets': datasets,
            'phase_models': models,
            'callables': eq_callables,
            'data_weight': data_weights.get('ACR', 1.0),
        },
    }
    return error_context
예제 #7
0
def invariant_samples(dbf,
                      params,
                      X,
                      P,
                      Tl,
                      Tu,
                      comp,
                      client=None,
                      comps=None,
                      phases=None):
    """
    Find the composition and temperature of the invariants
    for parameter sets in params (for a binary)

    Parameters
    ----------

    dbf : Database
        Thermodynamic database containing the relevant parameters
    conds : dict or list of dict
        StateVariables and their corresponding value
    params : numpy array
        Array where the rows contain the parameter sets
        for the pycalphad equilibrium calculation
    X : float
        Guess for the mole fraction (of comp) of the invariant
    P : float
        Pressure (in Pa) at which to search for the invariants
    Tl : float
        Lower temperature bound to search for the invariants
    Tu : float
        Upper temperature bound to search for the invariants
    comp : str
        Name of the species
    client : Client, optional
        interface to dask.distributed compute cluster
    comps : list, optional
        Names of species to consider in the calculation
    phases : list or dict, optional
        Names of phases to consider in the calculation

    Returns
    -------
    Tv : list
        List of invariant temperatures corresponding to the
        parameter sets
    phv : list of list
        List of lists of phases
    bndv : numpy array
        Array where the first index corresponds to the parameter
        set, and the second index corresponds to the composition
        of the zero phase fraction bounaries of the first and last
        phases in phv, and of the three phase equilibrium.

    Examples
    --------
    >>> # let's do a multicore example
    >>> # first import modules and functions
    >>> import numpy as np
    >>> from dask.distributed import Client
    >>> from distributed.deploy.local import LocalCluster
    >>> from pycalphad import Database, variables as v
    >>> from pduq.invariant_calc import invariant_samples
    >>> # start the distributed client to parallelize the calculation
    >>> c = LocalCluster(n_workers=2, threads_per_worker=1)
    >>> client = Client(c)
    >>> # load the pycalphad database
    >>> dbf = Database('CU-MG_param_gen.tdb')
    >>> # load the parameter file
    >>> params = np.loadtxt('trace.csv', delimeter=',')[-2:, :]
    >>> # calculate the locations of invariant points for these two
    >>> # parameter sets in params
    >>> Tv, phv, bndv = invariant_samples(
    >>>     dbf, params, client=client, X=.2, P=101325,
    >>>     Tl=600, Tu=1400, comp='MG')
    >>> # print the temperatures of the invariant points
    >>> print(Tv)
    [1008.29467773 993.89038086]
    >>> # print the phases in equilibrium at the invariant points
    >>> print(phv)
    [['FCC_A1' 'LIQUID' 'LAVES_C15'], ['FCC_A1' 'LIQUID' 'LAVES_C15']]
    >>> # print the Mg molar fractions for the left phase boundary,
    >>> # the invariant, and the right phase boundary
    >>> print(bndv)
    [[0.04005779 0.21173958 0.33261747]
     [0.04096447 0.21720666 0.33295817]]
    """

    if comps is None:
        comps = list(dbf.elements)

    if phases is None:
        phases = list(dbf.phases.keys())

    neq = params.shape[0]  # calculate invariants for neq parameter sets

    symbols_to_fit = database_symbols_to_fit(dbf)

    # eq_callables = get_eq_callables_(dbf, comps, phases, symbols_to_fit)
    eq_callables = None  # eq_callables is disabled for current pycalcphad

    kwargs = {
        'dbf': dbf,
        'comps': comps,
        'phases': phases,
        'X': X,
        'P': P,
        'Tl': Tl,
        'Tu': Tu,
        'comp': comp,
        'params': params,
        'symbols_to_fit': symbols_to_fit,
        'eq_callables': eq_callables
    }

    # invariant_(0, **kwargs)

    # define the map for the invariant calculation for neq parameter sets
    if client is None:
        invL = []
        for ii in range(neq):
            invL.append(invariant_(ii, **kwargs))
    else:
        A = client.map(invariant_, range(neq), **kwargs)
        invL = client.gather(A)
        client.close()

    # collect the key results after the map
    Tv = np.zeros((neq, ))
    phv = neq * [None]
    bndv = np.zeros((neq, 3))
    for ii in range(neq):
        Tv[ii] = invL[ii][0]
        phv[ii] = invL[ii][1]
        bndv[ii, :] = invL[ii][2]

    return Tv, phv, bndv
예제 #8
0
def run_espei(run_settings):
    """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary.

    Parameters
    ----------
    run_settings : dict
        Dictionary of input settings

    Returns
    -------
    Either a Database (for generate parameters only) or a tuple of (Database, sampler)
    """
    run_settings = get_run_settings(run_settings)
    system_settings = run_settings['system']
    output_settings = run_settings['output']
    generate_parameters_settings = run_settings.get('generate_parameters')
    mcmc_settings = run_settings.get('mcmc')

    # handle verbosity
    verbosity = {
        0: logging.WARNING,
        1: logging.INFO,
        2: TRACE,
        3: logging.DEBUG
    }
    logging.basicConfig(level=verbosity[output_settings['verbosity']],
                        filename=output_settings['logfile'])

    log_version_info()

    # load datasets and handle i/o
    logging.log(TRACE, 'Loading and checking datasets.')
    dataset_path = system_settings['datasets']
    datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json')))
    if len(datasets.all()) == 0:
        logging.warning(
            'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'
            .format(dataset_path))
    apply_tags(datasets, system_settings.get('tags', dict()))
    add_ideal_exclusions(datasets)
    logging.log(TRACE, 'Finished checking datasets')

    with open(system_settings['phase_models']) as fp:
        phase_models = json.load(fp)

    if generate_parameters_settings is not None:
        refdata = generate_parameters_settings['ref_state']
        excess_model = generate_parameters_settings['excess_model']
        ridge_alpha = generate_parameters_settings['ridge_alpha']
        aicc_penalty = generate_parameters_settings['aicc_penalty_factor']
        input_dbf = generate_parameters_settings.get('input_db', None)
        if input_dbf is not None:
            input_dbf = Database(input_dbf)
        dbf = generate_parameters(
            phase_models,
            datasets,
            refdata,
            excess_model,
            ridge_alpha=ridge_alpha,
            dbf=input_dbf,
            aicc_penalty_factor=aicc_penalty,
        )
        dbf.to_file(output_settings['output_db'], if_exists='overwrite')

    if mcmc_settings is not None:
        tracefile = output_settings['tracefile']
        probfile = output_settings['probfile']
        # check that the MCMC output files do not already exist
        # only matters if we are actually running MCMC
        if os.path.exists(tracefile):
            raise OSError(
                'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'
                .format(tracefile))
        if os.path.exists(probfile):
            raise OSError(
                'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'
                .format(probfile))

        # scheduler setup
        if mcmc_settings['scheduler'] == 'dask':
            _raise_dask_work_stealing()  # check for work-stealing
            from distributed import LocalCluster
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            # TODO: make dask-scheduler-verbosity a YAML input so that users can debug. Should have the same log levels as verbosity
            scheduler = LocalCluster(n_workers=cores,
                                     threads_per_worker=1,
                                     processes=True,
                                     memory_limit=0)
            client = ImmediateClient(scheduler)
            client.run(logging.basicConfig,
                       level=verbosity[output_settings['verbosity']],
                       filename=output_settings['logfile'])
            logging.info("Running with dask scheduler: %s [%s cores]" %
                         (scheduler, sum(client.ncores().values())))
            try:
                bokeh_server_info = client.scheduler_info(
                )['services']['bokeh']
                logging.info(
                    "bokeh server for dask scheduler at localhost:{}".format(
                        bokeh_server_info))
            except KeyError:
                logging.info("Install bokeh to use the dask bokeh server.")
        elif mcmc_settings['scheduler'] == 'None':
            client = None
            logging.info(
                "Not using a parallel scheduler. ESPEI is running MCMC on a single core."
            )
        else:  # we were passed a scheduler file name
            _raise_dask_work_stealing()  # check for work-stealing
            client = ImmediateClient(scheduler_file=mcmc_settings['scheduler'])
            client.run(logging.basicConfig,
                       level=verbosity[output_settings['verbosity']],
                       filename=output_settings['logfile'])
            logging.info("Running with dask scheduler: %s [%s cores]" %
                         (client.scheduler, sum(client.ncores().values())))

        # get a Database
        if mcmc_settings.get('input_db'):
            dbf = Database(mcmc_settings.get('input_db'))

        # load the restart trace if needed
        if mcmc_settings.get('restart_trace'):
            restart_trace = np.load(mcmc_settings.get('restart_trace'))
        else:
            restart_trace = None

        # load the remaining mcmc fitting parameters
        iterations = mcmc_settings.get('iterations')
        save_interval = mcmc_settings.get('save_interval')
        chains_per_parameter = mcmc_settings.get('chains_per_parameter')
        chain_std_deviation = mcmc_settings.get('chain_std_deviation')
        deterministic = mcmc_settings.get('deterministic')
        prior = mcmc_settings.get('prior')
        data_weights = mcmc_settings.get('data_weights')
        syms = mcmc_settings.get('symbols')

        # set up and run the EmceeOptimizer
        optimizer = EmceeOptimizer(dbf, scheduler=client)
        optimizer.save_interval = save_interval
        all_symbols = syms if syms is not None else database_symbols_to_fit(
            dbf)
        optimizer.fit(all_symbols,
                      datasets,
                      prior=prior,
                      iterations=iterations,
                      chains_per_parameter=chains_per_parameter,
                      chain_std_deviation=chain_std_deviation,
                      deterministic=deterministic,
                      restart_trace=restart_trace,
                      tracefile=tracefile,
                      probfile=probfile,
                      mcmc_data_weights=data_weights)
        optimizer.commit()

        optimizer.dbf.to_file(output_settings['output_db'],
                              if_exists='overwrite')
        # close the scheduler, if possible
        if hasattr(client, 'close'):
            client.close()
        return optimizer.dbf, optimizer.sampler
    return dbf
예제 #9
0
def mcmc_fit(
    dbf,
    datasets,
    iterations=1000,
    save_interval=100,
    chains_per_parameter=2,
    chain_std_deviation=0.1,
    scheduler=None,
    tracefile=None,
    probfile=None,
    restart_trace=None,
    deterministic=True,
):
    """
    Run Markov Chain Monte Carlo on the Database given datasets

    Parameters
    ----------
    dbf : Database
        A pycalphad Database to fit with symbols to fit prefixed with `VV`
        followed by a number, e.g. `VV0001`
    datasets : PickleableTinyDB
        A database of single- and multi-phase to fit
    iterations : int
        Number of trace iterations to calculate in MCMC. Default is 1000 iterations.
    save_interval :int
        interval of iterations to save the tracefile and probfile
    chains_per_parameter : int
        number of chains for each parameter. Must be an even integer greater or
        equal to 2. Defaults to 2.
    chain_std_deviation : float
        standard deviation of normal for parameter initialization as a fraction
        of each parameter. Must be greater than 0. Default is 0.1, which is 10%.
    scheduler : callable
        Scheduler to use with emcee. Must implement a map method.
    tracefile : str
        filename to store the trace with NumPy.save. Array has shape
        (chains, iterations, parameters)
    probfile : str
        filename to store the log probability with NumPy.save. Has shape (chains, iterations)
    restart_trace : np.ndarray
        ndarray of the previous trace. Should have shape (chains, iterations, parameters)
    deterministic : bool
        If True, the emcee sampler will be seeded to give deterministic sampling
        draws. This will ensure that the runs with the exact same database,
        chains_per_parameter, and chain_std_deviation (or restart_trace) will
        produce exactly the same results.

    Returns
    -------
    dbf : Database
        Resulting pycalphad database of optimized parameters
    sampler : EnsembleSampler, ndarray)
        emcee sampler for further data wrangling
    """
    comps = sorted([sp for sp in dbf.elements])
    symbols_to_fit = database_symbols_to_fit(dbf)

    if len(symbols_to_fit) == 0:
        raise ValueError(
            'No degrees of freedom. Database must contain symbols starting with \'V\' or \'VV\', followed by a number.'
        )
    else:
        logging.info('Fitting {} degrees of freedom.'.format(
            len(symbols_to_fit)))

    for x in symbols_to_fit:
        if isinstance(dbf.symbols[x], sympy.Piecewise):
            logging.debug('Replacing {} in database'.format(x))
            dbf.symbols[x] = dbf.symbols[x].args[0].expr

    # get initial parameters and remove these from the database
    # we'll replace them with SymPy symbols initialized to 0 in the phase models
    initial_parameters = np.array(
        [np.array(float(dbf.symbols[x])) for x in symbols_to_fit])

    # construct the models for each phase, substituting in the SymPy symbol to fit.
    logging.debug('Building phase models (this may take some time)')
    # 0 is placeholder value
    phases = sorted(dbf.phases.keys())
    sympy_symbols_to_fit = [sympy.Symbol(sym) for sym in symbols_to_fit]
    orig_parameters = {
        sym: p
        for sym, p in zip(symbols_to_fit, initial_parameters)
    }
    eq_callables = build_callables(dbf,
                                   comps,
                                   phases,
                                   model=Model,
                                   parameters=orig_parameters)
    # because error_context expencts 'phase_models' key, change it
    eq_callables['phase_models'] = eq_callables.pop('model')
    eq_callables.pop('phase_records')
    # we also need to build models that have no ideal mixing for thermochemical error and to build them for each property we might calculate
    # TODO: potential optimization to only calculate for phase/property combos that we have in the datasets
    # first construct dict of models without ideal mixing
    mods_no_idmix = {}
    for phase_name in phases:
        # we have to pass the list of Symbol objects to fit so they are popped from the database and can properly be replaced.
        mods_no_idmix[phase_name] = Model(dbf,
                                          comps,
                                          phase_name,
                                          parameters=sympy_symbols_to_fit)
        mods_no_idmix[phase_name].models['idmix'] = 0
    # now construct callables for each possible property that can be calculated
    thermochemical_callables = {
    }  # will be dict of {output_property: eq_callables_dict}
    whitelist_properties = ['HM', 'SM', 'CPM']
    whitelist_properties = whitelist_properties + [
        prop + '_MIX' for prop in whitelist_properties
    ]
    for prop in whitelist_properties:
        thermochemical_callables[prop] = build_callables(
            dbf,
            comps,
            phases,
            model=mods_no_idmix,
            output=prop,
            parameters=orig_parameters,
            build_gradients=False)
        # pop off the callables not used in properties because we don't want them around (they should be None, anyways)
        thermochemical_callables[prop].pop('phase_records')
        thermochemical_callables[prop].pop('model')
    logging.debug('Finished building phase models')

    # context for the log probability function
    error_context = {
        'comps': comps,
        'dbf': dbf,
        'phases': phases,
        'phase_models': eq_callables['phase_models'],
        'datasets': datasets,
        'symbols_to_fit': symbols_to_fit,
        'thermochemical_callables': thermochemical_callables,
        'callables': eq_callables,
    }

    def save_sampler_state(sampler):
        if tracefile:
            logging.debug('Writing trace to {}'.format(tracefile))
            np.save(tracefile, sampler.chain)
        if probfile:
            logging.debug('Writing lnprob to {}'.format(probfile))
            np.save(probfile, sampler.lnprobability)

    # initialize the walkers either fresh or from the restart
    if restart_trace is not None:
        walkers = restart_trace[np.nonzero(restart_trace)].reshape(
            (restart_trace.shape[0], -1, restart_trace.shape[2]))[:, -1, :]
        nwalkers = walkers.shape[0]
        ndim = walkers.shape[1]
        initial_parameters = walkers.mean(axis=0)
        logging.info(
            'Restarting from previous calculation with {} chains ({} per parameter).'
            .format(nwalkers, nwalkers / ndim))
        logging.debug(
            'Means of restarting parameters are {}'.format(initial_parameters))
        logging.debug(
            'Standard deviations of restarting parameters are {}'.format(
                walkers.std(axis=0)))
    else:
        logging.debug('Initial parameters: {}'.format(initial_parameters))
        ndim = initial_parameters.size
        nwalkers = ndim * chains_per_parameter
        logging.info(
            'Initializing {} chains with {} chains per parameter.'.format(
                nwalkers, chains_per_parameter))
        walkers = generate_parameter_distribution(initial_parameters,
                                                  nwalkers,
                                                  chain_std_deviation,
                                                  deterministic=deterministic)

    # the pool must implement a map function
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    kwargs=error_context,
                                    pool=scheduler)
    if deterministic:
        from espei.rstate import numpy_rstate
        sampler.random_state = numpy_rstate
        logging.info('Using a deterministic ensemble sampler.')
    progbar_width = 30
    logging.info('Running MCMC for {} iterations.'.format(iterations))
    try:
        for i, result in enumerate(
                sampler.sample(walkers, iterations=iterations)):
            # progress bar
            if (i + 1) % save_interval == 0:
                save_sampler_state(sampler)
                logging.debug('Acceptance ratios for parameters: {}'.format(
                    sampler.acceptance_fraction))
            n = int((progbar_width + 1) * float(i) / iterations)
            sys.stdout.write("\r[{0}{1}] ({2} of {3})\n".format(
                '#' * n, ' ' * (progbar_width - n), i + 1, iterations))
        n = int((progbar_width + 1) * float(i + 1) / iterations)
        sys.stdout.write("\r[{0}{1}] ({2} of {3})\n".format(
            '#' * n, ' ' * (progbar_width - n), i + 1, iterations))
    except KeyboardInterrupt:
        pass
    # final processing
    save_sampler_state(sampler)
    optimal_params = optimal_parameters(sampler.chain, sampler.lnprobability)
    logging.debug('Intial parameters: {}'.format(initial_parameters))
    logging.debug('Optimal parameters: {}'.format(optimal_params))
    logging.debug('Change in parameters: {}'.format(
        np.abs(initial_parameters - optimal_params) / initial_parameters))
    for param_name, value in zip(symbols_to_fit, optimal_params):
        dbf.symbols[param_name] = value
    logging.info('MCMC complete.')
    return dbf, sampler
예제 #10
0
def plot_property(dbf,
                  comps,
                  phaseL,
                  params,
                  T,
                  prop,
                  config=None,
                  datasets=None,
                  xlim=None,
                  xlabel=None,
                  ylabel=None,
                  yscale=None,
                  phase_label_dict=None,
                  unit='kJ/mol.',
                  cdict=None,
                  figsize=None):
    """
    Plot a property of interest versus temperature with uncertainty
    bounds for all phases of interest

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters
    comps : list
        Names of components to consider in the calculation
    phaseL : list
        Names of phases to plot properties for
    params : numpy array
        Array where the rows contain the parameter sets
        for the pycalphad equilibrium calculation
    T : list, array or x-array object
        Temperature values at which to plot the selected property
    prop : str
        property (or attribute in pycalphad terminology) to sample,
        e.g. GM for molar gibbs energy or H_MIX for the enthalpy of
        mixing
    config : tuple, optional
        Sublattice configuration as a tuple, e.g. (“CU”, (“CU”, “MG”))
    datasets : espei.utils.PickleableTinyDB, optional
        Database of datasets to search for data
    xlims : list or tuple of float, optional
        List or tuple with two floats corresponding to the
        minimum and maximum molar composition of comp
    xlabel : str, optional
        plot x label
    ylabel : str, optional
        plot y label
    yscale : int or float, optional
        scaling factor to apply to property (e.g. to plot kJ/mol.
        instead of J/mol. choose yscale to be 0.001)
    phase_label_dict : dict, optional
        Dictionary with keys given by phase names and corresponding
        strings to use in plotting (e.g. to enable LaTeX labels)
    unit : str, optional
        Unit to plot on the y-axis for the property of interest
    cdict : dict, optional
        Dictionary with phase names and corresponding
        colors
    figsize : tuple or list of int or float, optional
        Plot dimensions in inches

    Returns
    -------

    Examples
    --------
    >>> import numpy as np
    >>> import pduq.uq_plot as uq
    >>> from pycalphad import Database
    >>> dbf = Database('CU-MG_param_gen.tdb')
    >>> comps = ['MG', 'CU', 'VA']
    >>> phaseL = ['CUMG2', 'LIQUID']
    >>> params = np.loadtxt('params.npy')[: -1, :]
    >>> T = 650
    >>> prop = 'GM'
    >>> # Plot the molar gibbs energy of all phases in phaseL
    >>> # versus molar fraction of MG at 650K. This will have
    >>> # uncertainty intervals generated by the parameter sets
    >>> # in params
    >>> uq.plot_property(dbf, comps, phaseL, params, T, prop)
    """

    symbols_to_fit = database_symbols_to_fit(dbf)

    CI = 95
    nph = len(phaseL)
    colorL = sns.color_palette("cubehelix", nph)
    markerL = 10 * [
        'o', 'D', '^', 'x', 'h', 's', 'v', '*', 'P', 'p', '>', 'd', '<'
    ]

    plt.figure(figsize=figsize)

    # compute uncertainty in property for each phase in list
    for ii in range(nph):
        phase = phaseL[ii]
        print('starting', prop, 'evaluations for the', phase, 'phase')

        # for each parameter sample calculate the property
        # for each possible site occupancy ratios
        compL = []
        for index in range(params.shape[0]):
            param_dict = {
                param_name: param
                for param_name, param in zip(symbols_to_fit, params[index, :])
            }
            parameters = OrderedDict(sorted(param_dict.items(), key=str))
            comp = calculate(dbf,
                             comps,
                             phase,
                             P=101325,
                             T=T,
                             output=prop,
                             parameters=parameters)
            compL += [comp]

        # concatenate the calculate results in an xarray along
        # an axis named 'sample'
        compC = xr.concat(compL, 'sample')
        compC.coords['sample'] = np.arange(params.shape[0])

        # The composition vector is the same for all samples
        if hasattr(T, "__len__"):
            Xvals = T
        else:
            Xvals = comp.X.sel(component=comps[0]).values.squeeze()
        Pvals = compC[prop].where(compC.Phase == phase).values.squeeze()

        if np.array(Xvals).size == 1:
            print('phase is a line compound')
            Xvals_ = np.array([Xvals - 0.002, Xvals + 0.002])
            Pvals_ = np.vstack([Pvals, Pvals]).T
        else:
            # find the lower hull of the property by finding
            # the configuration with the lowest value within
            # each interval. In each interval record the composition
            # and property
            indxL = np.array([])
            # Xbnds = np.arange(0, 1.01, 0.01)
            Xbnds = np.linspace(Xvals.min(), Xvals.max(), 100)
            for lb, ub in zip(Xbnds[:-1], Xbnds[1:]):
                # print('lb: ', lb, ', ub: ', ub)
                boolA = (lb <= Xvals) * (Xvals < ub)
                if boolA.sum() == 0:
                    continue
                indxA = np.arange(boolA.size)[boolA]
                P_ = Pvals[0, boolA]
                indxL = np.append(indxL, indxA[P_.argmin()])
                # indxL = np.append(indxL, indxA[P_.argmax()])
            indxL = indxL.astype('int32')

            if indxL.size == 1:
                print('only one point found')
                Xvals_ = Xvals[np.asscalar(indxL)]
                Pvals_ = Pvals[:, np.asscalar(indxL)]
            else:
                Xvals_ = Xvals[indxL]
                Pvals_ = Pvals[:, indxL]

        # Xvals_ = Xvals
        # Pvals_ = Pvals
        # for ii in range(params.shape[0]):
        #     plt.plot(Xvals_, Pvals_[ii, :], 'k-', linewidth=0.5, alpha=0.1)
        # plt.show()

        if yscale is not None:
            Pvals_ *= yscale

        low, mid, high = np.percentile(
            Pvals_, [0.5 * (100 - CI), 50, 100 - 0.5 * (100 - CI)], axis=0)

        if cdict is not None:
            color = cdict[phase]
        else:
            color = colorL[ii]

        if phase_label_dict is not None:
            label = phase_label_dict[phase]
        else:
            label = phase

        plt.plot(Xvals_, mid, linestyle='-', color=color, label=label)
        plt.fill_between(np.atleast_1d(Xvals_),
                         low,
                         high,
                         alpha=0.3,
                         facecolor=color)

        # collect and plot experimental data
        if config is not None and datasets is not None:
            symmetry = None
            data = get_data(comps, phase, config, symmetry, datasets, prop)
            print(data)
            for data_s, marker in zip(data, markerL):
                occupancies = data_s['solver']['sublattice_occupancies']
                # at the moment this needs to be changed manually
                X_vec = [row[0][0] for row in occupancies]
                values = np.squeeze(data_s['values'])

                if yscale is not None:
                    values *= yscale

                plt.plot(X_vec,
                         values,
                         linestyle='',
                         marker=marker,
                         markerfacecolor='none',
                         markeredgecolor=color,
                         markersize=6,
                         alpha=0.9,
                         label=data_s['reference'])

    if xlim is None:
        plt.xlim([Xvals_.min(), Xvals_.max()])
    else:
        plt.xlim(xlim)

    if xlabel is not None:
        plt.xlabel(xlabel)
    else:
        plt.xlabel(r'$X_{%s}$' % comps[0])

    if ylabel is not None:
        plt.ylabel(ylabel)
    else:
        plt.ylabel(prop + ' (' + unit + ')')

    plt.legend()
    plt.tight_layout()
예제 #11
0
def mcmc_fit(dbf, datasets, mcmc_steps=1000, save_interval=100, chains_per_parameter=2,
             chain_std_deviation=0.1, scheduler=None, tracefile=None, probfile=None,
             restart_chain=None, deterministic=True,):
    """Run Markov Chain Monte Carlo on the Database given datasets

    Parameters
    ----------
    dbf : Database
        A pycalphad Database to fit with symbols to fit prefixed with `VV`
        followed by a number, e.g. `VV0001`
    datasets : PickleableTinyDB
        A database of single- and multi-phase to fit
    mcmc_steps : int
        Number of chain steps to calculate in MCMC. Note the flattened chain will
        have (mcmc_steps*DOF) values. Default is 1000 steps.
    save_interval :int
        interval of steps to save the chain to the tracefile and probfile
    chains_per_parameter : int
        number of chains for each parameter. Must be an even integer greater or
        equal to 2. Defaults to 2.
    chain_std_deviation : float
        standard deviation of normal for parameter initialization as a fraction
        of each parameter. Must be greater than 0. Default is 0.1, which is 10%.
    scheduler : callable
        Scheduler to use with emcee. Must implement a map method.
    tracefile : str
        filename to store the flattened chain with NumPy.save. Array has shape
        (nwalkers, iterations, nparams)
    probfile : str
        filename to store the flattened ln probability with NumPy.save
    restart_chain : np.ndarray
        ndarray of the previous chain. Should have shape (nwalkers, iterations, nparams)
    deterministic : bool
        If True, the emcee sampler will be seeded to give deterministic sampling
        draws. This will ensure that the runs with the exact same database,
        chains_per_parameter, and chain_std_deviation (or restart_chain) will
        produce exactly the same results.

    Returns
    -------
    dbf : Database
        Resulting pycalphad database of optimized parameters
    sampler : EnsembleSampler, ndarray)
        emcee sampler for further data wrangling
    """
    comps = sorted([sp for sp in dbf.elements])
    symbols_to_fit = database_symbols_to_fit(dbf)

    if len(symbols_to_fit) == 0:
        raise ValueError('No degrees of freedom. Database must contain symbols starting with \'V\' or \'VV\', followed by a number.')
    else:
        logging.info('Fitting {} degrees of freedom.'.format(len(symbols_to_fit)))

    for x in symbols_to_fit:
        if isinstance(dbf.symbols[x], sympy.Piecewise):
            logging.debug('Replacing {} in database'.format(x))
            dbf.symbols[x] = dbf.symbols[x].args[0].expr

    # get initial parameters and remove these from the database
    # we'll replace them with SymPy symbols initialized to 0 in the phase models
    initial_parameters = np.array([np.array(float(dbf.symbols[x])) for x in symbols_to_fit])
    for x in symbols_to_fit:
        del dbf.symbols[x]

    # construct the models for each phase, substituting in the SymPy symbol to fit.
    phase_models = dict()
    logging.debug('Building phase models')
    # 0 is placeholder value
    phases = sorted(dbf.phases.keys())
    for phase_name in phases:
        mod = CompiledModel(dbf, comps, phase_name, parameters=OrderedDict([(sympy.Symbol(s), 0) for s in symbols_to_fit]))
        phase_models[phase_name] = mod
    logging.debug('Finished building phase models')

    # contect for the log probability function
    error_context = {'comps': comps, 'dbf': dbf,
                     'phases': phases,
                     'phase_models': phase_models,
                     'datasets': datasets, 'symbols_to_fit': symbols_to_fit,
                     }

    def save_sampler_state(sampler):
        if tracefile:
            logging.debug('Writing chain to {}'.format(tracefile))
            np.save(tracefile, sampler.chain)
        if probfile:
            logging.debug('Writing lnprob to {}'.format(probfile))
            np.save(probfile, sampler.lnprobability)


    # initialize the walkers either fresh or from the restart
    if restart_chain is not None:
        walkers = restart_chain[np.nonzero(restart_chain)].reshape(
            (restart_chain.shape[0], -1, restart_chain.shape[2]))[:, -1, :]
        nwalkers = walkers.shape[0]
        ndim = walkers.shape[1]
        initial_parameters = walkers.mean(axis=0)
        logging.info('Restarting from previous calculation with {} chains ({} per parameter).'.format(nwalkers, nwalkers / ndim))
        logging.debug('Means of restarting parameters are {}'.format(initial_parameters))
        logging.debug('Standard deviations of restarting parameters are {}'.format(walkers.std(axis=0)))
    else:
        logging.debug('Initial parameters: {}'.format(initial_parameters))
        ndim = initial_parameters.size
        nwalkers = ndim * chains_per_parameter
        logging.info('Initializing {} chains with {} chains per parameter.'.format(nwalkers, chains_per_parameter))
        walkers = generate_parameter_distribution(initial_parameters, nwalkers, chain_std_deviation, deterministic=deterministic)

    # the pool must implement a map function
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, kwargs=error_context, pool=scheduler)
    if deterministic:
        from espei.rstate import numpy_rstate
        sampler.random_state = numpy_rstate
        logging.info('Using a deterministic ensemble sampler.')
    progbar_width = 30
    logging.info('Running MCMC with {} steps.'.format(mcmc_steps))
    try:
        for i, result in enumerate(sampler.sample(walkers, iterations=mcmc_steps)):
            # progress bar
            if (i + 1) % save_interval == 0:
                save_sampler_state(sampler)
                logging.debug('Acceptance ratios for parameters: {}'.format(sampler.acceptance_fraction))
            n = int((progbar_width + 1) * float(i) / mcmc_steps)
            sys.stdout.write("\r[{0}{1}] ({2} of {3})\n".format('#'*n, ' '*(progbar_width - n), i + 1, mcmc_steps))
        n = int((progbar_width + 1) * float(i + 1) / mcmc_steps)
        sys.stdout.write("\r[{0}{1}] ({2} of {3})\n".format('#'*n, ' '*(progbar_width - n), i + 1, mcmc_steps))
    except KeyboardInterrupt:
        pass
    # final processing
    save_sampler_state(sampler)
    optimal_params = optimal_parameters(sampler.chain, sampler.lnprobability)
    logging.debug('Intial parameters: {}'.format(initial_parameters))
    logging.debug('Optimal parameters: {}'.format(optimal_params))
    logging.debug('Change in parameters: {}'.format(np.abs(initial_parameters - optimal_params) / initial_parameters))
    for param_name, value in zip(symbols_to_fit, optimal_params):
        dbf.symbols[param_name] = value
    logging.info('MCMC complete.')
    return dbf, sampler
예제 #12
0
파일: dbf_calc.py 프로젝트: npaulson/pduq
def eq_calc_samples(dbf,
                    conds,
                    params,
                    client=None,
                    comps=None,
                    phases=None,
                    savef=None):
    """
    Perform equilibrium calculations for the parameter sets
    in params

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters
    conds : dict or list of dict
        StateVariables and their corresponding value
    params : numpy array
        Array where the rows contain the parameter sets
        for the pycalphad equilibrium calculation
    client : Client, optional
        interface to dask.distributed compute cluster
    comps : list
        Names of components to consider in the calculation
    phases : list or dict
        Names of phases to consider in the calculation
    savef : str
        Save file for the equilibrium calculations

    Returns
    -------
    structured equilibrium calculation
        structured equilibrium calculations for parameter sets in
        params

    Examples
    --------
    >>> # let's do a multicore example
    >>> # first import modules and functions
    >>> import numpy as np
    >>> from pycalphad import Database, variables as v
    >>> from distributed.deploy.local import LocalCluster
    >>> from pduq.dbf_calc import eq_calc_samples
    >>> # start the distributed client to parallelize the calculation
    >>> c = LocalCluster(n_workers=2, threads_per_worker=1)
    >>> client = Client(c)
    >>> # load the pycalphad database
    >>> dbf = Database('CU-MG_param_gen.tdb')
    >>> # load the parameter file
    >>> params = np.loadtxt('trace.csv', delimeter=',')
    >>> # define the equilibrium conditions
    >>> conds = {v.P: 101325, v.T: 1003, v.X('MG'): 0.214}
    >>> # perform the parallel equilibrium calculations for the last two
    >>> # parameter sets in param
    >>> eqC = eq_calc_samples(dbf, conds, params[-2:, :], client=client)
    >>> # let's look at the phases in equilibrium for the two parameter
    >>> # sets
    >>> print(np.squeeze(eqC.Phase.values))
    [['FCC_A1' 'LAVES_C15' '']
     ['LIQUID' '' '']]
    """

    if comps is None:
        comps = list(dbf.elements)

    if phases is None:
        phases = list(dbf.phases.keys())

    symbols_to_fit = database_symbols_to_fit(dbf)

    # eq_callables = get_eq_callables_(dbf, comps, phases, symbols_to_fit)
    eq_callables = None

    kwargs = {
        'dbf': dbf,
        'comps': comps,
        'phases': phases,
        'conds': conds,
        'params': params,
        'symbols_to_fit': symbols_to_fit,
        'eq_callables': eq_callables
    }

    neq = params.shape[0]

    if neq < 20:
        nch = neq
    else:
        nch = 20
    chunks = [list(range(neq))[ii::nch] for ii in range(nch)]

    if client is None:
        eqL = []
        for chunk in chunks:
            eqL += eq_calc_chunk_(chunk, **kwargs)
    else:
        A = client.map(eq_calc_chunk_, chunks, **kwargs)
        eqL = client.gather(A)
        eqL = list(chain.from_iterable(eqL))
        client.close()

    eqC = xr.concat(eqL, 'sample')
    eqC.coords['sample'] = np.arange(neq)

    logging.info(str(eqC))

    if savef is not None:
        with open(savef, 'wb') as buff:
            pickle.dump(eqC, buff)

    return eqC
예제 #13
0
파일: mcmc.py 프로젝트: npaulson/ESPEI
def mcmc_fit(dbf,
             datasets,
             iterations=1000,
             save_interval=1,
             chains_per_parameter=2,
             chain_std_deviation=0.1,
             scheduler=None,
             tracefile=None,
             probfile=None,
             restart_trace=None,
             deterministic=True,
             prior=None,
             mcmc_data_weights=None):
    """
    Run MCMC via the EmceeOptimizer class

    Parameters
    ----------
    dbf : Database
        A pycalphad Database to fit with symbols to fit prefixed with `VV`
        followed by a number, e.g. `VV0001`
    datasets : PickleableTinyDB
        A database of single- and multi-phase data to fit
    iterations : int
        Number of trace iterations to calculate in MCMC. Default is 1000 iterations.
    save_interval :int
        interval of iterations to save the tracefile and probfile
    chains_per_parameter : int
        number of chains for each parameter. Must be an even integer greater or
        equal to 2. Defaults to 2.
    chain_std_deviation : float
        standard deviation of normal for parameter initialization as a fraction
        of each parameter. Must be greater than 0. Default is 0.1, which is 10%.
    scheduler : callable
        Scheduler to use with emcee. Must implement a map method.
    tracefile : str
        filename to store the trace with NumPy.save. Array has shape
        (chains, iterations, parameters)
    probfile : str
        filename to store the log probability with NumPy.save. Has shape (chains, iterations)
    restart_trace : np.ndarray
        ndarray of the previous trace. Should have shape (chains, iterations, parameters)
    deterministic : bool
        If True, the emcee sampler will be seeded to give deterministic sampling
        draws. This will ensure that the runs with the exact same database,
        chains_per_parameter, and chain_std_deviation (or restart_trace) will
        produce exactly the same results.
    prior : str
        Prior to use to generate priors. Defaults to 'zero', which keeps
        backwards compatibility. Can currently choose 'normal', 'uniform',
        'triangular', or 'zero'.
    mcmc_data_weights : dict
        Dictionary of weights for each data type, e.g. {'ZPF': 20, 'HM': 2}

    """
    warnings.warn("The mcmc convenience function will be removed in ESPEI 0.8")
    all_symbols = database_symbols_to_fit(dbf)

    optimizer = EmceeOptimizer(dbf, scheduler=scheduler)
    optimizer.save_interval = save_interval
    optimizer.fit(all_symbols,
                  datasets,
                  prior=prior,
                  iterations=iterations,
                  chains_per_parameter=chains_per_parameter,
                  chain_std_deviation=chain_std_deviation,
                  deterministic=deterministic,
                  restart_trace=restart_trace,
                  tracefile=tracefile,
                  probfile=probfile,
                  mcmc_data_weights=mcmc_data_weights)
    optimizer.commit()
    return optimizer.dbf, optimizer.sampler