예제 #1
0
def test_empty_prior():
    """Check that priors are zero when none are defined."""
    # define negative log posterior
    posterior_fun = pypesto.Objective(fun=negative_log_posterior)

    # define pypesto problem without prior object
    test_problem = pypesto.Problem(objective=posterior_fun,
                                   lb=-10,
                                   ub=10,
                                   x_names=['x'])

    sampler = sample.AdaptiveMetropolisSampler()

    result = sample.sample(
        test_problem,
        n_samples=50,
        sampler=sampler,
        x0=np.array([0.0]),
        filename=None,
    )

    # get log prior values of first chain
    logprior_trace = -result.sample_result.trace_neglogprior[0, :]

    # check that all entries are zero
    assert (logprior_trace == 0.0).all()
예제 #2
0
def test_autocorrelation_pipeline():
    """Check that the autocorrelation test works."""
    problem = gaussian_problem()

    sampler = sample.MetropolisSampler()

    # optimization
    result = optimize.minimize(problem, n_starts=3)

    # sample
    result = sample.sample(
        problem, sampler=sampler, n_samples=1000, result=result)

    # run auto-correlation with previous geweke
    sample.geweke_test(result)

    ac1 = sample.auto_correlation(result)

    # run auto-correlation without previous geweke
    result.sample_result.burn_in = None
    ac2 = sample.auto_correlation(result)

    assert ac1 == ac2

    # run effective sample size with previous geweke
    # and autocorrelation
    ess1 = sample.effective_sample_size(result)

    # run effective sample size without previous geweke
    # and autocorrelation
    result.sample_result.burn_in = None
    result.sample_result.auto_correlation = None
    ess2 = sample.effective_sample_size(result)

    assert ess1 == ess2
예제 #3
0
def test_autocorrelation_short_chain():
    """Check that the autocorrelation
    reacts nicely to small sample numbers."""
    problem = gaussian_problem()

    sampler = sample.MetropolisSampler()

    # optimization
    result = optimize.minimize(problem, n_starts=3, filename=None)

    # sample
    result = sample.sample(problem,
                           sampler=sampler,
                           n_samples=10,
                           result=result,
                           filename=None)

    # manually set burn in to chain length (only for testing!!)
    chain_length = result.sample_result.trace_x.shape[1]
    result.sample_result.burn_in = chain_length

    # run auto-correlation
    ac = sample.auto_correlation(result)

    assert ac is None

    # run effective sample size
    ess = sample.effective_sample_size(result)

    assert ess is None
예제 #4
0
def test_ground_truth():
    """Test whether we actually retrieve correct distributions."""
    # use best self-implemented sampler, which has a chance of correctly
    # sample from the distribution
    sampler = sample.AdaptiveParallelTemperingSampler(
        internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=5)

    problem = gaussian_problem()

    result = optimize.minimize(problem, filename=None)

    result = sample.sample(problem,
                           n_samples=5000,
                           result=result,
                           sampler=sampler,
                           filename=None)

    # get samples of first chain
    samples = result.sample_result.trace_x[0].flatten()

    # test against different distributions

    statistic, pval = kstest(samples, 'norm')
    print(statistic, pval)
    assert statistic < 0.1

    statistic, pval = kstest(samples, 'uniform')
    print(statistic, pval)
    assert statistic > 0.1
예제 #5
0
def test_multiple_startpoints():
    problem = gaussian_problem()
    x0s = [np.array([0]), np.array([1])]
    sampler = sample.ParallelTemperingSampler(
        internal_sampler=sample.MetropolisSampler(), n_chains=2)
    result = sample.sample(problem, n_samples=10, x0=x0s, sampler=sampler)

    assert result.sample_result.trace_neglogpost.shape[0] == 2
    assert [
        result.sample_result.trace_x[0][0], result.sample_result.trace_x[1][0]
    ] == x0s
예제 #6
0
def sample_petab_problem():
    # create problem
    problem = create_petab_problem()

    sampler = sample.AdaptiveMetropolisSampler()
    result = sample.sample(
        problem,
        n_samples=1000,
        sampler=sampler,
        x0=np.array([3, -4]),
        filename=None,
    )
    return result
예제 #7
0
def test_pipeline(sampler, problem):
    """Check that a typical pipeline runs through."""
    # optimization
    optimizer = optimize.ScipyOptimizer(options={'maxiter': 10})
    result = optimize.minimize(
        problem, n_starts=3, optimizer=optimizer)

    # sample
    result = sample.sample(
        problem, sampler=sampler, n_samples=100, result=result)

    # some plot
    visualize.sampling_1d_marginals(result)
    plt.close()
예제 #8
0
def test_geweke_test_unconverged():
    """Check that the geweke test reacts nicely to small sample numbers."""
    problem = gaussian_problem()

    sampler = sample.MetropolisSampler()

    # optimization
    result = optimize.minimize(problem, n_starts=3)

    # sample
    result = sample.sample(
        problem, sampler=sampler, n_samples=100, result=result)

    # run geweke test (should not fail!)
    sample.geweke_test(result)
예제 #9
0
def test_prior():
    """Check that priors are defined for sampling."""
    # define negative log posterior
    posterior_fun = pypesto.Objective(fun=negative_log_posterior)

    # define negative log prior
    prior_fun = pypesto.Objective(fun=negative_log_prior)

    # define pypesto prior object
    prior_object = pypesto.NegLogPriors(objectives=[prior_fun])

    # define pypesto problem using prior object
    test_problem = pypesto.Problem(
        objective=posterior_fun,
        x_priors_defs=prior_object,
        lb=-10,
        ub=10,
        x_names=['x'],
    )

    sampler = sample.AdaptiveMetropolisSampler()

    result = sample.sample(
        test_problem,
        n_samples=1e4,
        sampler=sampler,
        x0=np.array([0.0]),
        filename=None,
    )

    # get log prior values of first chain
    logprior_trace = -result.sample_result.trace_neglogprior[0, :]

    # check that not all entries are zero
    assert (logprior_trace != 0.0).any()

    # get samples of first chain
    samples = result.sample_result.trace_x[0, :, 0]

    # generate ground-truth samples
    rvs = norm.rvs(size=5000, loc=-1.0, scale=np.sqrt(0.7))

    # check sample distribution agreement with the ground-truth
    statistic, pval = ks_2samp(rvs, samples)
    print(statistic, pval)

    assert statistic < 0.1
예제 #10
0
def test_samples_cis():
    """
    Test whether :py:func:`pypesto.sample.calculate_ci_mcmc_sample` produces
    percentile-based credibility intervals correctly.
    """
    # load problem
    problem = gaussian_problem()

    # set a sampler
    sampler = sample.MetropolisSampler()

    # optimization
    result = optimize.minimize(problem, n_starts=3, filename=None)

    # sample
    result = sample.sample(problem,
                           sampler=sampler,
                           n_samples=2000,
                           result=result,
                           filename=None)

    # run geweke test
    sample.geweke_test(result)

    # get converged chain
    converged_chain = np.asarray(
        result.sample_result.trace_x[0, result.sample_result.burn_in:, :])

    # set confidence levels
    alpha_values = [0.99, 0.95, 0.68]

    # loop over confidence levels
    for alpha in alpha_values:
        # calculate parameter samples confidence intervals
        lb, ub = sample.calculate_ci_mcmc_sample(result, ci_level=alpha)
        # get corresponding percentiles to alpha
        percentiles = 100 * np.array([(1 - alpha) / 2, 1 - (1 - alpha) / 2])
        # check result agreement
        diff = np.percentile(converged_chain, percentiles, axis=0) - [lb, ub]

        assert (diff == 0).all()
        # check if lower bound is smaller than upper bound
        assert (lb < ub).all()
        # check if dimmensions agree
        assert lb.shape == ub.shape
예제 #11
0
def test_pipeline(sampler, problem):
    """Check that a typical pipeline runs through."""
    # optimization
    optimizer = optimize.ScipyOptimizer(options={'maxiter': 10})
    result = optimize.minimize(problem,
                               n_starts=3,
                               optimizer=optimizer,
                               filename=None)

    # sample
    result = sample.sample(problem,
                           sampler=sampler,
                           n_samples=100,
                           result=result,
                           filename=None)
    # remove warnings in test/sample/test_sample.
    # Warning here: pypesto/visualize/sampling.py:1104
    # geweke test
    sample.geweke_test(result=result)

    # some plot
    visualize.sampling_1d_marginals(result)
    plt.close()
예제 #12
0
def test_storage_all():
    """Test `read_result` and `write_result`.

    It currently does not test read/write of the problem as this
    is know to not work completely. Also excludes testing the history
    key of an optimization result.
    """
    objective = pypesto.Objective(fun=so.rosen,
                                  grad=so.rosen_der,
                                  hess=so.rosen_hess)
    dim_full = 10
    lb = -5 * np.ones((dim_full, 1))
    ub = 5 * np.ones((dim_full, 1))
    n_starts = 5
    problem = pypesto.Problem(objective=objective, lb=lb, ub=ub)

    optimizer = optimize.ScipyOptimizer()
    # Optimization
    result = optimize.minimize(
        problem=problem,
        optimizer=optimizer,
        n_starts=n_starts,
        filename=None,
    )
    # Profiling
    result = profile.parameter_profile(
        problem=problem,
        result=result,
        profile_index=[0],
        optimizer=optimizer,
        filename=None,
    )
    # Sampling
    sampler = sample.AdaptiveMetropolisSampler()
    result = sample.sample(
        problem=problem,
        sampler=sampler,
        n_samples=100,
        result=result,
        filename=None,
    )
    # Read and write
    filename = 'test_file.hdf5'
    try:
        write_result(result=result, filename=filename)
        result_read = read_result(filename=filename)

        # test optimize
        for i, opt_res in enumerate(result.optimize_result.list):
            for key in opt_res:
                if key == 'history':
                    continue
                if isinstance(opt_res[key], np.ndarray):
                    np.testing.assert_array_equal(
                        opt_res[key], result_read.optimize_result.list[i][key])
                else:
                    assert (opt_res[key] == result_read.optimize_result.list[i]
                            [key])

        # test profile
        for key in result.profile_result.list[0][0].keys():
            if (result.profile_result.list[0][0].keys is None
                    or key == 'time_path'):
                continue
            elif isinstance(result.profile_result.list[0][0][key], np.ndarray):
                np.testing.assert_array_equal(
                    result.profile_result.list[0][0][key],
                    result_read.profile_result.list[0][0][key],
                )
            elif isinstance(result.profile_result.list[0][0][key], int):
                assert (result.profile_result.list[0][0][key] ==
                        result_read.profile_result.list[0][0][key])

        # test sample
        for key in result.sample_result.keys():
            if result.sample_result[key] is None or key == 'time':
                continue
            elif isinstance(result.sample_result[key], np.ndarray):
                np.testing.assert_array_equal(
                    result.sample_result[key],
                    result_read.sample_result[key],
                )
            elif isinstance(result.sample_result[key], (float, int)):
                np.testing.assert_almost_equal(
                    result.sample_result[key],
                    result_read.sample_result[key],
                )
    finally:
        if os.path.exists(filename):
            os.remove(filename)
예제 #13
0
def test_storage_sampling():
    """
    This test tests the saving and loading of samples
    into HDF5 through pypesto.store.SamplingResultHDF5Writer
    and pypesto.store.SamplingResultHDF5Reader. Tests all entries
    aside from time and message.
    """
    objective = pypesto.Objective(fun=so.rosen,
                                  grad=so.rosen_der,
                                  hess=so.rosen_hess)
    dim_full = 10
    lb = -5 * np.ones((dim_full, 1))
    ub = 5 * np.ones((dim_full, 1))
    n_starts = 5
    startpoints = pypesto.startpoint.latin_hypercube(n_starts=n_starts,
                                                     lb=lb,
                                                     ub=ub)
    problem = pypesto.Problem(objective=objective,
                              lb=lb,
                              ub=ub,
                              x_guesses=startpoints)

    optimizer = optimize.ScipyOptimizer()

    result_optimization = optimize.minimize(
        problem=problem,
        optimizer=optimizer,
        n_starts=n_starts,
        filename=None,
    )
    x_0 = result_optimization.optimize_result.list[0]['x']
    sampler = sample.AdaptiveParallelTemperingSampler(
        internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=1)
    sample_original = sample.sample(
        problem=problem,
        sampler=sampler,
        n_samples=100,
        x0=[x_0],
        filename=None,
    )

    fn = 'test_file.hdf5'
    try:
        pypesto_sample_writer = SamplingResultHDF5Writer(fn)
        pypesto_sample_writer.write(sample_original)
        pypesto_sample_reader = SamplingResultHDF5Reader(fn)
        sample_read = pypesto_sample_reader.read()

        for key in sample_original.sample_result.keys():
            if sample_original.sample_result[key] is None or key == 'time':
                continue
            elif isinstance(sample_original.sample_result[key], np.ndarray):
                np.testing.assert_array_equal(
                    sample_original.sample_result[key],
                    sample_read.sample_result[key],
                )
            elif isinstance(sample_original.sample_result[key], (float, int)):
                np.testing.assert_almost_equal(
                    sample_original.sample_result[key],
                    sample_read.sample_result[key],
                )
    finally:
        if os.path.exists(fn):
            os.remove(fn)
예제 #14
0
def test_ground_truth_separated_modes():
    """Test whether we actually retrieve correct distributions."""
    # use best self-implemented sampler, which has a chance to correctly
    # sample from the distribution

    # First use parallel tempering with 3 chains
    sampler = sample.AdaptiveParallelTemperingSampler(
        internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=3)

    problem = gaussian_mixture_separated_modes_problem()

    result = sample.sample(
        problem,
        n_samples=1e4,
        sampler=sampler,
        x0=np.array([0.0]),
        filename=None,
    )

    # get samples of first chain
    samples = result.sample_result.trace_x[0, :, 0]

    # generate bimodal ground-truth samples
    # "first" mode centered at -1
    rvs1 = norm.rvs(size=5000, loc=-1.0, scale=np.sqrt(0.7))
    # "second" mode centered at 100
    rvs2 = norm.rvs(size=5001, loc=100.0, scale=np.sqrt(0.8))

    # test for distribution similarity
    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)

    # only parallel tempering finds both modes
    print(statistic, pval)
    assert statistic < 0.2

    # sample using adaptive metropolis (single-chain)
    # initiated around the "first" mode of the distribution
    sampler = sample.AdaptiveMetropolisSampler()
    result = sample.sample(
        problem,
        n_samples=1e4,
        sampler=sampler,
        x0=np.array([-2.0]),
        filename=None,
    )

    # get samples of first chain
    samples = result.sample_result.trace_x[0, :, 0]

    # test for distribution similarity
    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)

    # single-chain adaptive metropolis does not find both modes
    print(statistic, pval)
    assert statistic > 0.1

    # actually centered at the "first" mode
    statistic, pval = ks_2samp(rvs1, samples)

    print(statistic, pval)
    assert statistic < 0.1

    # sample using adaptive metropolis (single-chain)
    # initiated around the "second" mode of the distribution
    sampler = sample.AdaptiveMetropolisSampler()
    result = sample.sample(
        problem,
        n_samples=1e4,
        sampler=sampler,
        x0=np.array([120.0]),
        filename=None,
    )

    # get samples of first chain
    samples = result.sample_result.trace_x[0, :, 0]

    # test for distribution similarity
    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)

    # single-chain adaptive metropolis does not find both modes
    print(statistic, pval)
    assert statistic > 0.1

    # actually centered at the "second" mode
    statistic, pval = ks_2samp(rvs2, samples)

    print(statistic, pval)
    assert statistic < 0.1