Esempio n. 1
0
    def _fit(self,
             symbols,
             ds,
             method='Powell',
             maxiter=50,
             verbose=False,
             **kwargs):
        # Use Scipy's minimize with the Powell method to optimize Lnprob
        ctx = setup_context(self.dbf, ds, symbols)

        symbols_to_fit = ctx['symbols_to_fit']
        initial_guess = np.array(
            [unpack_piecewise(self.dbf.symbols[s]) for s in symbols_to_fit])
        if verbose:
            print('Fitting', symbols_to_fit)
            print('Initial guess', initial_guess)
        s = time.time()
        out = minimize(self.predict,
                       initial_guess,
                       args=(ctx, ),
                       method=method,
                       options={'maxiter': maxiter},
                       **kwargs)
        xs = np.atleast_1d(out.x).tolist()
        if verbose:
            print('Found', xs, 'in', int(time.time() - s), 's')
        parameters = dict(zip(symbols_to_fit, xs))
        return OptNode(parameters, ds)
Esempio n. 2
0
 def __init__(self, dbf):
     self.orig_dbf = copy.deepcopy(dbf)
     self.dbf = copy.deepcopy(dbf)
     parameters = {
         sym: unpack_piecewise(dbf.symbols[sym])
         for sym in database_symbols_to_fit(dbf)
     }
     ds = load_datasets([])  # empty TinyDB
     root = OptNode(parameters, ds)
     self.current_node = root
     self.staged_nodes = []
     self.graph = OptGraph(root)
Esempio n. 3
0
def test_equilibrium_thermochemical_context_is_pickleable(datasets_db):
    """Test that the context for equilibrium thermochemical data is pickleable"""
    datasets_db.insert(CU_MG_EQ_HMR_LIQUID)
    dbf = Database(CU_MG_TDB)

    symbols_to_fit = database_symbols_to_fit(dbf)
    initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
    prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess)
    ctx = setup_context(dbf, datasets_db)
    ctx.update(prior_dict)

    ctx_pickle = pickle.dumps(ctx)
    ctx_unpickled = pickle.loads(ctx_pickle)

    regular_predict  = EmceeOptimizer.predict(initial_guess, **ctx)
    unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled)
    assert np.isclose(regular_predict, unpickle_predict)
Esempio n. 4
0
def test_zpf_context_is_pickleable(datasets_db):
    """Test that the context for ZPF data is pickleable"""
    datasets_db.insert(CU_MG_DATASET_ZPF_ZERO_ERROR)
    dbf = Database(CU_MG_TDB)

    symbols_to_fit = database_symbols_to_fit(dbf)
    initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
    prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess)
    ctx = setup_context(dbf, datasets_db)
    ctx.update(prior_dict)

    ctx_pickle = pickle.dumps(ctx)
    ctx_unpickled = pickle.loads(ctx_pickle)

    regular_predict  = EmceeOptimizer.predict(initial_guess, **ctx)
    unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled)
    assert np.isclose(regular_predict, unpickle_predict)
Esempio n. 5
0
    def _fit(
        self,
        symbols,
        ds,
        prior=None,
        iterations=1000,
        chains_per_parameter=2,
        chain_std_deviation=0.1,
        deterministic=True,
        restart_trace=None,
        tracefile=None,
        probfile=None,
        mcmc_data_weights=None,
        approximate_equilibrium=False,
    ):
        """

        Parameters
        ----------
        symbols : list of str
        ds : PickleableTinyDB
        prior : str
            Prior to use to generate priors. Defaults to 'zero', which keeps
            backwards compatibility. Can currently choose 'normal', 'uniform',
            'triangular', or 'zero'.
        iterations : int
            Number of iterations to calculate in MCMC. Default is 1000.
        chains_per_parameter : int
            number of chains for each parameter. Must be an even integer greater
            or equal to 2. Defaults to 2.
        chain_std_deviation : float
            Standard deviation of normal for parameter initialization as a
            fraction of each parameter. Must be greater than 0. Defaults to 0.1.
        deterministic : bool
            If True, the emcee sampler will be seeded to give deterministic sampling
            draws. This will ensure that the runs with the exact same database,
            chains_per_parameter, and chain_std_deviation (or restart_trace) will
            produce exactly the same results.
        restart_trace : np.ndarray
            ndarray of the previous trace. Should have shape (chains, iterations, parameters)
        tracefile : str
            filename to store the trace with NumPy.save. Array has shape
            (chains, iterations, parameters)
        probfile : str
            filename to store the log probability with NumPy.save. Has shape (chains, iterations)
        mcmc_data_weights : dict
            Dictionary of weights for each data type, e.g. {'ZPF': 20, 'HM': 2}

        Returns
        -------
        Dict[str, float]

        """
        # Set NumPy print options so logged arrays print on one line. Reset at the end.
        np.set_printoptions(linewidth=sys.maxsize)
        cbs = self.scheduler is None
        ctx = setup_context(self.dbf,
                            ds,
                            symbols,
                            data_weights=mcmc_data_weights,
                            phase_models=self.phase_models,
                            make_callables=cbs)
        symbols_to_fit = ctx['symbols_to_fit']
        initial_guess = np.array(
            [unpack_piecewise(self.dbf.symbols[s]) for s in symbols_to_fit])

        prior_dict = self.get_priors(prior, symbols_to_fit, initial_guess)
        ctx.update(prior_dict)
        if 'zpf_kwargs' in ctx:
            ctx['zpf_kwargs'][
                'approximate_equilibrium'] = approximate_equilibrium
        if 'equilibrium_thermochemical_kwargs' in ctx:
            ctx['equilibrium_thermochemical_kwargs'][
                'approximate_equilibrium'] = approximate_equilibrium
        # Run the initial parameters for guessing purposes:
        _log.trace("Probability for initial parameters")
        self.predict(initial_guess, **ctx)
        if restart_trace is not None:
            chains = self.initialize_chains_from_trace(restart_trace)
            # TODO: check that the shape is valid with the existing parameters
        else:
            chains = self.initialize_new_chains(initial_guess,
                                                chains_per_parameter,
                                                chain_std_deviation,
                                                deterministic)
        sampler = emcee.EnsembleSampler(chains.shape[0],
                                        initial_guess.size,
                                        self.predict,
                                        kwargs=ctx,
                                        pool=self.scheduler)
        if deterministic:
            from espei.rstate import numpy_rstate
            sampler.random_state = numpy_rstate
            _log.info('Using a deterministic ensemble sampler.')
        self.sampler = sampler
        self.tracefile = tracefile
        self.probfile = probfile
        # Run the MCMC simulation
        self.do_sampling(chains, iterations)

        # Post process
        optimal_params = optimal_parameters(sampler.chain,
                                            sampler.lnprobability)
        _log.trace('Initial parameters: %s', initial_guess)
        _log.trace('Optimal parameters: %s', optimal_params)
        _log.trace('Change in parameters: %s',
                   np.abs(initial_guess - optimal_params) / initial_guess)
        parameters = dict(zip(symbols_to_fit, optimal_params))
        np.set_printoptions(linewidth=75)
        return parameters