def asdict(self, index=None): """Returns the data as a dictionary. Parameters ---------- index : slice, optional Only get the elements indicated by the given slice before converting to a dictionary. """ if index is None: return array2dict(self.data) else: return array2dict(self[index])
def _concatenate_dicts(self, attr): """Concatenates dictionary attributes over all of the chains. This is a convenience function used by properties such as ``current_positions`` to gather all of the dictionary attributes from the chains. Parameters ---------- attr : str The name of the attribute to get from the chains. The attribute is assumed to return a dictionary. Returns ------- dict : Dictionary mapping parameters to arrays. The arrays have shape ``ntemps x nchains``. """ # we'll create a chain data instance to stack the dictionaries d = getattr(self.chains[0], attr) out = ChainData(list(d.keys()), dtypes=detect_dtypes(d), ntemps=self.ntemps) out.extend(self.nchains) for ii, chain in enumerate(self.chains): out[ii] = getattr(chain, attr) return array2dict(out.data.T)
def _check_array(array, expected_params, expected_shape): """Helper function to test arrays returned by the sampler.""" # check that the fields are the same as the model's assert sorted(array.dtype.names) == sorted(expected_params) # check that the shape is what's expected assert array.shape == expected_shape # check that we can turn this into a dictionary adict = epsie.array2dict(array) assert sorted(adict.keys()) == sorted(expected_params) for param, val in adict.items(): assert val.shape == expected_shape
def samples(self): """A dict mapping ``variable_params`` to arrays of samples currently in memory. The arrays have shape ``ntemps x nchains x niterations``. The dictionary also contains sampling parameters. """ samples = epsie.array2dict(self._sampler.positions) # apply boundary conditions samples = self.model.prior_distribution.apply_boundary_conditions( **samples) # apply transforms to go to model's variable params space if self.model.sampling_transforms is not None: samples = self.model.sampling_transforms.apply(samples, inverse=True) return samples
def detect_dtypes(data): """Convenience function to detect the dtype of some data. Parameters ---------- data : dict or numpy.ndarray Either a numpy structred array/void or a dictionary mapping parameter names to some (arbitrary) values. The values may be either arrays or atomic data. If the former, the dtype will be taken from the array's dtype. Returns ------- dict : Dictionary mapping the parameter names to types. """ if not isinstance(data, dict): # assume it's a numpy.void or numpy.ndarray data = array2dict(data) return { p: val.dtype if isinstance(val, numpy.ndarray) else type(val) for (p, val) in data.items() }
def _check_chains_are_different(chain, other, test_blobs, test_state=True): """Checks that two chains' random states and positions/stats/blobs are different. """ if test_state: rstate = chain.state['proposal_dist']['random_state'] ostate = other.state['proposal_dist']['random_state'] assert rstate != ostate _anticompare_dict_array(epsie.array2dict(chain.positions), epsie.array2dict(other.positions)) _anticompare_dict_array(epsie.array2dict(chain.stats), epsie.array2dict(other.stats)) if test_blobs: # note: we're checking that the blobs aren't the same, but # it might happen for a model that they would be the same # across chains, depending on the data. The testing models # in utils.py return the value of the log likelihood in # each parameter for the blobs, so we expect them to be # different in this case _anticompare_dict_array(epsie.array2dict(chain.blobs), epsie.array2dict(other.blobs))
def test_chains(model_cls, nprocs, swap_interval, proposals=None): """Sets up and runs a sampler for a few iterations, then performs the following checks: * That the positions, stats, acceptance ratios, and (if the model returns blobs) blobs all have the expected parameters, shape ntemps x nchains x niterations, and can be converted to dictionaries of arrays. * That the ``current_(positions|stats|blobs)`` (if the model returns blobs) are the same as the last item in the positions/stats/blobs. * If the model does not return blobs, that the ``blobs`` and ``current_blobs`` are all None. * That the chains all have different random states after the iterations, and different positions/stats/blobs. """ model = model_cls() sampler = _create_sampler(model, nprocs, nchains=NCHAINS, seed=SEED, swap_interval=swap_interval, proposals=proposals) # check that the number of parameters that we have proposals for # matches the number of model parameters joint_dist = sampler.chains[0].chains[0].proposal_dist prop_params = set.union(*[set(p.parameters) for p in joint_dist.proposals]) assert set(joint_dist.parameters) == prop_params assert prop_params == set(model.params) if proposals is not None: # check that the proposals used by the sampler match what we gave it pdict = {frozenset(p.parameters): p for p in joint_dist.proposals} for prop in proposals: prop_params = frozenset(prop.parameters) assert prop_params in pdict assert prop.name == pdict[prop_params].name sampler.run(ITERINT) # check that the number of recorded iterations matches how long we # actually ran for assert sampler.niterations == ITERINT # check that we get the positions back in the expected format positions = sampler.positions expected_shape = (NTEMPS, NCHAINS, ITERINT) _check_array(positions, model.params, expected_shape) # check that the current positions have the right shape for arr in sampler.start_position.values(): assert arr.shape == (NTEMPS, NCHAINS) for arr in sampler.current_positions.values(): assert arr.shape == (NTEMPS, NCHAINS) for arr in sampler.current_stats.values(): assert arr.shape == (NTEMPS, NCHAINS) if model.blob_params: for arr in sampler.current_blobs.values(): assert arr.shape == (NTEMPS, NCHAINS) # check that the current position is the same as the last in the array _compare_dict_array(epsie.array2dict(positions[..., -1]), sampler.current_positions) # check that the stats have the expected fields and shape stats = sampler.stats _check_array(stats, ['logl', 'logp'], expected_shape) # check that the current position is the same as the last in the array _compare_dict_array(epsie.array2dict(stats[..., -1]), sampler.current_stats) # check that the acceptance ratios have the expected fields and shape acceptance = sampler.acceptance _check_array(acceptance, ['acceptance_ratio', 'accepted'], expected_shape) # check that the temperature swaps have the expected shape temperature_swaps = sampler.temperature_swaps assert temperature_swaps.shape == (NTEMPS, NCHAINS, ITERINT // swap_interval) # ditto for the temperature acceptance temperature_acceptance = sampler.temperature_acceptance assert temperature_acceptance.shape == (NTEMPS - 1, NCHAINS, ITERINT // swap_interval) # check the individual chains for ii, chain in enumerate(sampler.chains): # check that the length matches the number of iterations assert len(chain) == ITERINT # check that hasblobs is None if the model doesn't return any assert chain.hasblobs == bool(model.blob_params) # do the same for every temperature for kk, subchain in enumerate(chain.chains): # check that the length matches the number of iterations assert len(subchain) == ITERINT # check that hasblobs is None if the model doesn't return any assert subchain.hasblobs == bool(model.blob_params) # check the blobs blobs = sampler.blobs current_blobs = sampler.current_blobs if model.blob_params: _check_array(blobs, model.blob_params, expected_shape) _compare_dict_array(epsie.array2dict(blobs[..., -1]), current_blobs) else: # check that blobs are None since this model doesn't have blobs assert blobs is None assert current_blobs is None # check that every temperature in every chain has a different random state # and different current values than all others combos = itertools.combinations(range(len(sampler.chains)), 2) temp_combos = itertools.combinations(range(NTEMPS), 2) # check that all temps have different current positions/stats/blobs within # each chain, but that they all have the same random state for chain in sampler.chains: for kk, ll in temp_combos: _check_chains_are_different(chain.chains[kk], chain.chains[ll], test_blobs=bool(model.blob_params), test_state=False) rstate = chain.chains[kk].state['proposal_dist']['random_state'] ostate = chain.chains[ll].state['proposal_dist']['random_state'] assert rstate == ostate # now check that all temps in different chains are different for ii, jj in combos: chain = sampler.chains[ii] other = sampler.chains[jj] for kk in range(NTEMPS): for ll in range(NTEMPS): _check_chains_are_different(chain.chains[kk], other.chains[ll], test_blobs=bool(model.blob_params)) if sampler.pool is not None: sampler.pool.close()
def model_stats(self): """A dict mapping the model's ``default_stats`` to arrays of values. The arrays have shape ``ntemps x nchains x niterations``. """ return epsie.array2dict(self._sampler.blobs)
def test_chains(model_cls, nprocs): """Sets up and runs a sampler for a few iterations, then performs the following checks: * That the positions, stats, acceptance ratios, and (if the model returns blobs) blobs all have the expected parameters, shape nchains x niterations, and can be converted to dictionaries of arrays. * That the ``current_(positions|stats|blobs)`` (if the model returns blobs) are the same as the last item in the positions/stats/blobs. * If the model does not return blobs, that the ``blobs`` and ``current_blobs`` are all None. * That the chains all have different random states after the iterations, and different positions/stats/blobs. """ model = model_cls() sampler = _create_sampler(model, nprocs, nchains=NCHAINS, seed=SEED) # check that the number of parameters that we have proposals for # matches the number of model parameters joint_dist = sampler.chains[0].proposal_dist prop_params = set.union(*[set(p.parameters) for p in joint_dist.proposals]) assert set(joint_dist.parameters) == prop_params assert prop_params == set(model.params) # run for some iterations sampler.run(ITERINT) # check that the number of recorded iterations matches how long we # actually ran for assert sampler.niterations == ITERINT # check that we get the positions back in the expected format positions = sampler.positions expected_shape = (NCHAINS, ITERINT) _check_array(positions, model.params, expected_shape) # check that the current position is the same as the last in the array _compare_dict_array(epsie.array2dict(positions[..., -1]), sampler.current_positions) # check that the stats have the expected fields and shape stats = sampler.stats _check_array(stats, ['logl', 'logp'], expected_shape) # check that the current position is the same as the last in the array _compare_dict_array(epsie.array2dict(stats[..., -1]), sampler.current_stats) # check that the acceptance ratios have the expected fields and shape acceptance = sampler.acceptance _check_array(acceptance, ['acceptance_ratio', 'accepted'], expected_shape) # check the individual chains for ii, chain in enumerate(sampler.chains): # check that the length matches the number of iterations assert len(chain) == ITERINT # check that hasblobs is None if the model doesn't return any assert chain.hasblobs == bool(model.blob_params) # check the blobs blobs = sampler.blobs current_blobs = sampler.current_blobs if model.blob_params: _check_array(blobs, model.blob_params, expected_shape) _compare_dict_array(epsie.array2dict(blobs[..., -1]), current_blobs) else: # check that blobs are None since this model doesn't have blobs assert blobs is None assert current_blobs is None # check that each chain's random state and current values are different combos = itertools.combinations(range(len(sampler.chains)), 2) for ii, jj in combos: _check_chains_are_different(sampler.chains[ii], sampler.chains[jj], test_blobs=bool(model.blob_params)) # terminate the multiprocessing pool so we don't end up with too many # open processes if sampler.pool is not None: sampler.pool.close()