def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and
    distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    n_files = 10
    n_sims_per_file = 10**6

    _, obs_stats = helper.load(datadir + 'observed_data.pkl')

    for j in xrange(n_files):

        ps = np.empty([n_sims_per_file, 3])
        stats = np.empty([n_sims_per_file, n_percentiles])
        dist = np.empty(n_sims_per_file)

        for i in xrange(n_sims_per_file):
            ps[i] = sim_prior()
            _, _, _, idts, _ = sim_likelihood(*ps[i])
            stats[i] = calc_summary_stats(idts)
            dist[i] = calc_dist(stats[i], obs_stats)

            print 'simulation {0}, distance = {1}'.format(
                j * n_sims_per_file + i, dist[i])

        # save data
        filename = datadir + 'sims_from_prior_{0}.pkl'.format(j)
        helper.save((ps, stats, dist), filename)
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and
    distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    n_sims = 10 ** 7

    # load observed data and prior
    _, x, obs_data = helper.load(datadir + 'observed_data.pkl')
    prior = get_prior()

    # generate new data
    ws = np.empty([n_sims, n_dim])
    data = np.empty([n_sims, n_data])
    dist = np.empty(n_sims)

    for i in xrange(n_sims):

        w = prior.gen()[0]
        this_data = gen_y_data(w, x)

        ws[i] = w
        data[i] = this_data
        dist[i] = calc_dist(this_data, obs_data)

        print 'simulation {0}, distance = {1}'.format(i, dist[i])

    helper.save((ws, data, dist), datadir + 'sims_from_prior.pkl')
Example #3
0
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, normalized summary statistics
    and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    num_sims = 100000

    pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl')

    obs_stats = helper.load(datadir + 'obs_stats.pkl')
    obs_stats -= pilot_means
    obs_stats /= pilot_stds

    params = []
    stats = []
    dist = []

    for i in xrange(num_sims):

        prop_params = sim_prior_params()
        lv = mjp.LotkaVolterra(init, prop_params)

        try:
            states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
        except mjp.SimTooLongException:
            continue

        sum_stats = calc_summary_stats(states)
        sum_stats -= pilot_means
        sum_stats /= pilot_stds

        params.append(prop_params)
        stats.append(sum_stats)
        dist.append(calc_dist(sum_stats, obs_stats))

        print 'simulation {0}, distance = {1}'.format(i, dist[-1])

    params = np.array(params)
    stats = np.array(stats)
    dist = np.array(dist)

    filename = datadir + 'sims_from_prior_{0}.pkl'.format(time.time())
    helper.save((params, stats, dist), filename)
def show_true_posterior():
    """Calculates analytically and shows the true posterior."""

    w, x, y = helper.load(datadir + 'observed_data.pkl')

    prior = get_prior()
    posterior = calc_posterior(prior, x, y)

    helper.plot_pdf_marginals(pdf=prior, lims=[-3.0, 3.0], gt=w)
    helper.plot_pdf_marginals(pdf=posterior, lims=[-3.0, 3.0], gt=w)
def calc_summary_stats(data, whiten=True):
    """Given observations, calculate summary statistics."""

    perc = np.linspace(0.0, 100.0, n_percentiles)
    stats = np.percentile(data, perc)

    if whiten:

        # whiten stats
        means, U, istds = helper.load(datadir + 'pilot_run_results.pkl')
        stats -= means
        stats = np.dot(stats, U)
        stats *= istds

    return stats
def load_sims_from_prior(n_files=10):
    """Loads the huge file(s) that store the results from simulations from the prior."""

    ws = np.empty([0, n_dim])
    data = np.empty([0, n_data])
    dist = np.empty([0])

    for i in xrange(n_files):

        ws_i, data_i, dist_i = helper.load(datadir + 'sims_from_prior_{0}.pkl'.format(i))
        ws = np.concatenate([ws, ws_i], axis=0)
        data = np.concatenate([data, data_i], axis=0)
        dist = np.concatenate([dist, dist_i], axis=0)

    n_sims = ws.shape[0]
    assert n_sims == data.shape[0]
    assert n_sims == dist.shape[0]

    return ws, data, dist
Example #7
0
def load_sims_from_prior(n_files=12):
    """Loads the huge file(s) that store the results from simulations from the prior."""

    params = np.empty([0, 4])
    stats = np.empty([0, 9])
    dist = np.empty([0])

    for i in xrange(n_files):

        params_i, stats_i, dist_i = helper.load(
            datadir + 'sims_from_prior_{0}.pkl'.format(i))
        params = np.concatenate([params, params_i], axis=0)
        stats = np.concatenate([stats, stats_i], axis=0)
        dist = np.concatenate([dist, dist_i], axis=0)

    n_sims = params.shape[0]
    assert n_sims == stats.shape[0]
    assert n_sims == dist.shape[0]

    return params, stats, dist
def show_histograms(n_samples=1000):
    """Simulates from joint and shows histograms of simulations."""

    true_ps, obs_stats = helper.load(datadir + 'observed_data.pkl')

    ps = np.empty([n_samples, 3])
    stats = np.empty([n_samples, n_percentiles])

    for i in xrange(n_samples):
        ps[i] = sim_prior()
        _, _, _, idts, _ = sim_likelihood(*ps[i])
        stats[i] = calc_summary_stats(idts)

    # plot prior parameter histograms
    helper.plot_hist_marginals(ps, lims=disp_lims, gt=true_ps)
    plt.gcf().suptitle('p(thetas)')

    # plot stats histograms
    helper.plot_hist_marginals(stats, gt=obs_stats)
    plt.gcf().suptitle('p(stats)')

    plt.show(block=False)
Example #9
0
def sum_stats_hist():
    """
    Runs several simulations with given parameters and plots a histogram of the resulting normalized summary statistics.
    """

    n_sims = 1000
    sum_stats = []
    i = 1

    pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl')

    while i <= n_sims:

        lv = mjp.LotkaVolterra(init, true_params)

        try:
            states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
        except mjp.SimTooLongException:
            continue

        sum_stats.append(calc_summary_stats(states))

        print 'simulation {0}'.format(i)
        i += 1

    sum_stats = np.array(sum_stats)
    sum_stats -= pilot_means
    sum_stats /= pilot_stds

    _, axs = plt.subplots(3, 3)
    nbins = int(np.sqrt(n_sims))
    for i, ax in enumerate(axs.flatten()):
        ax.hist(sum_stats[:, i], nbins, normed=True)
        ax.set_title('stat ' + str(i + 1))

    plt.show()