Beispiel #1
0
def test_seed(model_cls, nprocs, proposals=None):
    """Tests that running with the same seed yields the same results,
    while running with a different seed yields different results.
    """
    model = model_cls()
    sampler = _create_sampler(model,
                              nprocs,
                              nchains=NCHAINS,
                              seed=SEED,
                              proposals=proposals)
    # now create another sampler with the same seed and starting position
    same_seed = _create_sampler(model,
                                nprocs,
                                nchains=NCHAINS,
                                seed=SEED,
                                proposals=proposals,
                                set_start=False)
    same_seed.start_position = sampler.start_position
    assert sampler.seed == same_seed.seed
    _compare_dict_array(sampler.start_position, same_seed.start_position)
    # we'll start the different seed from the same start position; this
    # should still yield different positions after several iterations
    diff_seed = _create_sampler(model,
                                nprocs,
                                nchains=NCHAINS,
                                seed=None,
                                proposals=proposals,
                                set_start=False)
    diff_seed.start_position = sampler.start_position
    # not passing a seed should result in a different seed; check that
    assert sampler.seed != diff_seed.seed
    sampler.run(ITERINT)
    same_seed.run(ITERINT)
    diff_seed.run(ITERINT)
    # check that the same seed gives the same result
    _compare_dict_array(sampler.current_positions, same_seed.current_positions)
    _compare_dict_array(sampler.current_stats, same_seed.current_stats)
    if model.blob_params:
        _compare_dict_array(sampler.current_blobs, same_seed.current_blobs)
    # check that different seeds give different results
    _anticompare_dict_array(sampler.current_positions,
                            diff_seed.current_positions)
    _anticompare_dict_array(sampler.current_stats, diff_seed.current_stats)
    if model.blob_params:
        _anticompare_dict_array(sampler.current_blobs, diff_seed.current_blobs)
    if sampler.pool is not None:
        sampler.pool.close()
        same_seed.pool.close()
        diff_seed.pool.close()
Beispiel #2
0
def test_clear_memory(model_cls, nprocs):
    """Tests that clearing memory and running yields the same result as if
    the memory had not been cleared.
    """
    model = model_cls()
    sampler = _create_sampler(model, nprocs, nchains=NCHAINS, seed=SEED)
    sampler2 = _create_sampler(model,
                               nprocs,
                               nchains=NCHAINS,
                               seed=SEED,
                               set_start=False)
    sampler2.start_position = sampler.start_position
    # run both for a few iterations
    sampler.run(ITERINT)
    sampler2.run(ITERINT)
    # clear one, but don't clear the other
    sampler.clear()
    # now run both for a few more iterations
    sampler.run(ITERINT)
    sampler2.run(ITERINT)
    # check that the number of recorded iterations matches how long we
    # actually ran for
    assert sampler.niterations == 2 * ITERINT
    assert sampler2.niterations == 2 * ITERINT
    # but that the lengths of the stored arrays differ
    expected_shape = (NCHAINS, ITERINT)
    expected_shape2 = (NCHAINS, 2 * ITERINT)
    _check_array(sampler.positions, model.params, expected_shape)
    _check_array(sampler2.positions, model.params, expected_shape2)
    _check_array(sampler.stats, ['logl', 'logp'], expected_shape)
    _check_array(sampler2.stats, ['logl', 'logp'], expected_shape2)
    _check_array(sampler.acceptance, ['acceptance_ratio', 'accepted'],
                 expected_shape)
    _check_array(sampler2.acceptance, ['acceptance_ratio', 'accepted'],
                 expected_shape2)
    if model.blob_params:
        _check_array(sampler.blobs, model.blob_params, expected_shape)
        _check_array(sampler2.blobs, model.blob_params, expected_shape2)
    # they should be in the same place
    _compare_dict_array(sampler.current_positions, sampler2.current_positions)
    _compare_dict_array(sampler.current_stats, sampler2.current_stats)
    if model.blob_params:
        _compare_dict_array(sampler.current_blobs, sampler2.current_blobs)
    if sampler.pool is not None:
        sampler.pool.close()
        sampler2.pool.close()
Beispiel #3
0
def test_checkpointing(model_cls, nprocs, proposals=None):
    """Tests that resuming from checkpoint yields the same result as if
    no checkpoint happened.

    This test requires h5py to be installed.
    """
    try:
        import h5py
    except ImportError:
        raise ImportError("h5py must be installed to run this test")
    model = model_cls()
    sampler = _create_sampler(model,
                              nprocs,
                              nchains=NCHAINS,
                              seed=SEED,
                              proposals=proposals)
    # create a second sampler for comparison; we won't bother setting
    # a seed or start position, since that shouldn't matter when loading
    # from a checkpoint
    sampler2 = _create_sampler(model,
                               nprocs,
                               nchains=NCHAINS,
                               seed=None,
                               proposals=proposals,
                               set_start=False)
    sampler.run(ITERINT)
    # checkpoint to an h5py file in memory
    fp = h5py.File('test.hdf', 'w', driver='core', backing_store=False)
    sampler.checkpoint(fp)
    # run for another set of iterations
    sampler.run(ITERINT)
    # set the other sampler's state using the checkpoint
    sampler2.set_state_from_checkpoint(fp)
    fp.close()
    # run again
    sampler2.run(ITERINT)
    # compare the two
    _compare_dict_array(sampler.current_positions, sampler2.current_positions)
    _compare_dict_array(sampler.current_stats, sampler2.current_stats)
    if model.blob_params:
        _compare_dict_array(sampler.current_blobs, sampler2.current_blobs)
    if sampler.pool is not None:
        sampler.pool.close()
        sampler2.pool.close()
Beispiel #4
0
def test_chains(model_cls, nprocs, swap_interval, proposals=None):
    """Sets up and runs a sampler for a few iterations, then performs
    the following checks:

    * That the positions, stats, acceptance ratios, and (if the model
      returns blobs) blobs all have the expected parameters, shape
      ntemps x nchains x niterations, and can be converted to dictionaries of
      arrays.
    * That the ``current_(positions|stats|blobs)`` (if the model returns
      blobs) are the same as the last item in the positions/stats/blobs.
    * If the model does not return blobs, that the ``blobs`` and
      ``current_blobs`` are all None.
    * That the chains all have different random states after the
      iterations, and different positions/stats/blobs.
    """
    model = model_cls()
    sampler = _create_sampler(model,
                              nprocs,
                              nchains=NCHAINS,
                              seed=SEED,
                              swap_interval=swap_interval,
                              proposals=proposals)
    # check that the number of parameters that we have proposals for
    # matches the number of model parameters
    joint_dist = sampler.chains[0].chains[0].proposal_dist
    prop_params = set.union(*[set(p.parameters) for p in joint_dist.proposals])
    assert set(joint_dist.parameters) == prop_params
    assert prop_params == set(model.params)
    if proposals is not None:
        # check that the proposals used by the sampler match what we gave it
        pdict = {frozenset(p.parameters): p for p in joint_dist.proposals}
        for prop in proposals:
            prop_params = frozenset(prop.parameters)
            assert prop_params in pdict
            assert prop.name == pdict[prop_params].name
    sampler.run(ITERINT)
    # check that the number of recorded iterations matches how long we
    # actually ran for
    assert sampler.niterations == ITERINT
    # check that we get the positions back in the expected format
    positions = sampler.positions
    expected_shape = (NTEMPS, NCHAINS, ITERINT)
    _check_array(positions, model.params, expected_shape)
    # check that the current positions have the right shape
    for arr in sampler.start_position.values():
        assert arr.shape == (NTEMPS, NCHAINS)
    for arr in sampler.current_positions.values():
        assert arr.shape == (NTEMPS, NCHAINS)
    for arr in sampler.current_stats.values():
        assert arr.shape == (NTEMPS, NCHAINS)
    if model.blob_params:
        for arr in sampler.current_blobs.values():
            assert arr.shape == (NTEMPS, NCHAINS)
    # check that the current position is the same as the last in the array
    _compare_dict_array(epsie.array2dict(positions[..., -1]),
                        sampler.current_positions)
    # check that the stats have the expected fields and shape
    stats = sampler.stats
    _check_array(stats, ['logl', 'logp'], expected_shape)
    # check that the current position is the same as the last in the array
    _compare_dict_array(epsie.array2dict(stats[..., -1]),
                        sampler.current_stats)
    # check that the acceptance ratios have the expected fields and shape
    acceptance = sampler.acceptance
    _check_array(acceptance, ['acceptance_ratio', 'accepted'], expected_shape)
    # check that the temperature swaps have the expected shape
    temperature_swaps = sampler.temperature_swaps
    assert temperature_swaps.shape == (NTEMPS, NCHAINS,
                                       ITERINT // swap_interval)
    # ditto for the temperature acceptance
    temperature_acceptance = sampler.temperature_acceptance
    assert temperature_acceptance.shape == (NTEMPS - 1, NCHAINS,
                                            ITERINT // swap_interval)
    # check the individual chains
    for ii, chain in enumerate(sampler.chains):
        # check that the length matches the number of iterations
        assert len(chain) == ITERINT
        # check that hasblobs is None if the model doesn't return any
        assert chain.hasblobs == bool(model.blob_params)
        # do the same for every temperature
        for kk, subchain in enumerate(chain.chains):
            # check that the length matches the number of iterations
            assert len(subchain) == ITERINT
            # check that hasblobs is None if the model doesn't return any
            assert subchain.hasblobs == bool(model.blob_params)
    # check the blobs
    blobs = sampler.blobs
    current_blobs = sampler.current_blobs
    if model.blob_params:
        _check_array(blobs, model.blob_params, expected_shape)
        _compare_dict_array(epsie.array2dict(blobs[..., -1]), current_blobs)
    else:
        # check that blobs are None since this model doesn't have blobs
        assert blobs is None
        assert current_blobs is None
    # check that every temperature in every chain has a different random state
    # and different current values than all others
    combos = itertools.combinations(range(len(sampler.chains)), 2)
    temp_combos = itertools.combinations(range(NTEMPS), 2)
    # check that all temps have different current positions/stats/blobs within
    # each chain, but that they all have the same random state
    for chain in sampler.chains:
        for kk, ll in temp_combos:
            _check_chains_are_different(chain.chains[kk],
                                        chain.chains[ll],
                                        test_blobs=bool(model.blob_params),
                                        test_state=False)
            rstate = chain.chains[kk].state['proposal_dist']['random_state']
            ostate = chain.chains[ll].state['proposal_dist']['random_state']
            assert rstate == ostate
    # now check that all temps in different chains are different
    for ii, jj in combos:
        chain = sampler.chains[ii]
        other = sampler.chains[jj]
        for kk in range(NTEMPS):
            for ll in range(NTEMPS):
                _check_chains_are_different(chain.chains[kk],
                                            other.chains[ll],
                                            test_blobs=bool(model.blob_params))
    if sampler.pool is not None:
        sampler.pool.close()
Beispiel #5
0
def test_chains(model_cls, nprocs):
    """Sets up and runs a sampler for a few iterations, then performs
    the following checks:

    * That the positions, stats, acceptance ratios, and (if the model
      returns blobs) blobs all have the expected parameters, shape
      nchains x niterations, and can be converted to dictionaries of
      arrays.
    * That the ``current_(positions|stats|blobs)`` (if the model returns
      blobs) are the same as the last item in the positions/stats/blobs.
    * If the model does not return blobs, that the ``blobs`` and
      ``current_blobs`` are all None.
    * That the chains all have different random states after the
      iterations, and different positions/stats/blobs.
    """
    model = model_cls()
    sampler = _create_sampler(model, nprocs, nchains=NCHAINS, seed=SEED)
    # check that the number of parameters that we have proposals for
    # matches the number of model parameters
    joint_dist = sampler.chains[0].proposal_dist
    prop_params = set.union(*[set(p.parameters) for p in joint_dist.proposals])
    assert set(joint_dist.parameters) == prop_params
    assert prop_params == set(model.params)
    # run for some iterations
    sampler.run(ITERINT)
    # check that the number of recorded iterations matches how long we
    # actually ran for
    assert sampler.niterations == ITERINT
    # check that we get the positions back in the expected format
    positions = sampler.positions
    expected_shape = (NCHAINS, ITERINT)
    _check_array(positions, model.params, expected_shape)
    # check that the current position is the same as the last in the array
    _compare_dict_array(epsie.array2dict(positions[..., -1]),
                        sampler.current_positions)
    # check that the stats have the expected fields and shape
    stats = sampler.stats
    _check_array(stats, ['logl', 'logp'], expected_shape)
    # check that the current position is the same as the last in the array
    _compare_dict_array(epsie.array2dict(stats[..., -1]),
                        sampler.current_stats)
    # check that the acceptance ratios have the expected fields and shape
    acceptance = sampler.acceptance
    _check_array(acceptance, ['acceptance_ratio', 'accepted'], expected_shape)
    # check the individual chains
    for ii, chain in enumerate(sampler.chains):
        # check that the length matches the number of iterations
        assert len(chain) == ITERINT
        # check that hasblobs is None if the model doesn't return any
        assert chain.hasblobs == bool(model.blob_params)
    # check the blobs
    blobs = sampler.blobs
    current_blobs = sampler.current_blobs
    if model.blob_params:
        _check_array(blobs, model.blob_params, expected_shape)
        _compare_dict_array(epsie.array2dict(blobs[..., -1]), current_blobs)
    else:
        # check that blobs are None since this model doesn't have blobs
        assert blobs is None
        assert current_blobs is None
    # check that each chain's random state and current values are different
    combos = itertools.combinations(range(len(sampler.chains)), 2)
    for ii, jj in combos:
        _check_chains_are_different(sampler.chains[ii],
                                    sampler.chains[jj],
                                    test_blobs=bool(model.blob_params))
    # terminate the multiprocessing pool so we don't end up with too many
    # open processes
    if sampler.pool is not None:
        sampler.pool.close()