def _fit(self, symbols, ds, method='Powell', maxiter=50, verbose=False, **kwargs): # Use Scipy's minimize with the Powell method to optimize Lnprob ctx = setup_context(self.dbf, ds, symbols) symbols_to_fit = ctx['symbols_to_fit'] initial_guess = np.array( [unpack_piecewise(self.dbf.symbols[s]) for s in symbols_to_fit]) if verbose: print('Fitting', symbols_to_fit) print('Initial guess', initial_guess) s = time.time() out = minimize(self.predict, initial_guess, args=(ctx, ), method=method, options={'maxiter': maxiter}, **kwargs) xs = np.atleast_1d(out.x).tolist() if verbose: print('Found', xs, 'in', int(time.time() - s), 's') parameters = dict(zip(symbols_to_fit, xs)) return OptNode(parameters, ds)
def __init__(self, dbf): self.orig_dbf = copy.deepcopy(dbf) self.dbf = copy.deepcopy(dbf) parameters = { sym: unpack_piecewise(dbf.symbols[sym]) for sym in database_symbols_to_fit(dbf) } ds = load_datasets([]) # empty TinyDB root = OptNode(parameters, ds) self.current_node = root self.staged_nodes = [] self.graph = OptGraph(root)
def test_equilibrium_thermochemical_context_is_pickleable(datasets_db): """Test that the context for equilibrium thermochemical data is pickleable""" datasets_db.insert(CU_MG_EQ_HMR_LIQUID) dbf = Database(CU_MG_TDB) symbols_to_fit = database_symbols_to_fit(dbf) initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit]) prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess) ctx = setup_context(dbf, datasets_db) ctx.update(prior_dict) ctx_pickle = pickle.dumps(ctx) ctx_unpickled = pickle.loads(ctx_pickle) regular_predict = EmceeOptimizer.predict(initial_guess, **ctx) unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled) assert np.isclose(regular_predict, unpickle_predict)
def test_zpf_context_is_pickleable(datasets_db): """Test that the context for ZPF data is pickleable""" datasets_db.insert(CU_MG_DATASET_ZPF_ZERO_ERROR) dbf = Database(CU_MG_TDB) symbols_to_fit = database_symbols_to_fit(dbf) initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit]) prior_dict = EmceeOptimizer.get_priors(None, symbols_to_fit, initial_guess) ctx = setup_context(dbf, datasets_db) ctx.update(prior_dict) ctx_pickle = pickle.dumps(ctx) ctx_unpickled = pickle.loads(ctx_pickle) regular_predict = EmceeOptimizer.predict(initial_guess, **ctx) unpickle_predict = EmceeOptimizer.predict(initial_guess, **ctx_unpickled) assert np.isclose(regular_predict, unpickle_predict)
def _fit( self, symbols, ds, prior=None, iterations=1000, chains_per_parameter=2, chain_std_deviation=0.1, deterministic=True, restart_trace=None, tracefile=None, probfile=None, mcmc_data_weights=None, approximate_equilibrium=False, ): """ Parameters ---------- symbols : list of str ds : PickleableTinyDB prior : str Prior to use to generate priors. Defaults to 'zero', which keeps backwards compatibility. Can currently choose 'normal', 'uniform', 'triangular', or 'zero'. iterations : int Number of iterations to calculate in MCMC. Default is 1000. chains_per_parameter : int number of chains for each parameter. Must be an even integer greater or equal to 2. Defaults to 2. chain_std_deviation : float Standard deviation of normal for parameter initialization as a fraction of each parameter. Must be greater than 0. Defaults to 0.1. deterministic : bool If True, the emcee sampler will be seeded to give deterministic sampling draws. This will ensure that the runs with the exact same database, chains_per_parameter, and chain_std_deviation (or restart_trace) will produce exactly the same results. restart_trace : np.ndarray ndarray of the previous trace. Should have shape (chains, iterations, parameters) tracefile : str filename to store the trace with NumPy.save. Array has shape (chains, iterations, parameters) probfile : str filename to store the log probability with NumPy.save. Has shape (chains, iterations) mcmc_data_weights : dict Dictionary of weights for each data type, e.g. {'ZPF': 20, 'HM': 2} Returns ------- Dict[str, float] """ # Set NumPy print options so logged arrays print on one line. Reset at the end. np.set_printoptions(linewidth=sys.maxsize) cbs = self.scheduler is None ctx = setup_context(self.dbf, ds, symbols, data_weights=mcmc_data_weights, phase_models=self.phase_models, make_callables=cbs) symbols_to_fit = ctx['symbols_to_fit'] initial_guess = np.array( [unpack_piecewise(self.dbf.symbols[s]) for s in symbols_to_fit]) prior_dict = self.get_priors(prior, symbols_to_fit, initial_guess) ctx.update(prior_dict) if 'zpf_kwargs' in ctx: ctx['zpf_kwargs'][ 'approximate_equilibrium'] = approximate_equilibrium if 'equilibrium_thermochemical_kwargs' in ctx: ctx['equilibrium_thermochemical_kwargs'][ 'approximate_equilibrium'] = approximate_equilibrium # Run the initial parameters for guessing purposes: _log.trace("Probability for initial parameters") self.predict(initial_guess, **ctx) if restart_trace is not None: chains = self.initialize_chains_from_trace(restart_trace) # TODO: check that the shape is valid with the existing parameters else: chains = self.initialize_new_chains(initial_guess, chains_per_parameter, chain_std_deviation, deterministic) sampler = emcee.EnsembleSampler(chains.shape[0], initial_guess.size, self.predict, kwargs=ctx, pool=self.scheduler) if deterministic: from espei.rstate import numpy_rstate sampler.random_state = numpy_rstate _log.info('Using a deterministic ensemble sampler.') self.sampler = sampler self.tracefile = tracefile self.probfile = probfile # Run the MCMC simulation self.do_sampling(chains, iterations) # Post process optimal_params = optimal_parameters(sampler.chain, sampler.lnprobability) _log.trace('Initial parameters: %s', initial_guess) _log.trace('Optimal parameters: %s', optimal_params) _log.trace('Change in parameters: %s', np.abs(initial_guess - optimal_params) / initial_guess) parameters = dict(zip(symbols_to_fit, optimal_params)) np.set_printoptions(linewidth=75) return parameters