Esempio n. 1
0
def do_pilot_run():
    """
    Runs a number of simulations, and it calculates and saves the mean and standard deviation of the summary statistics
    across simulations. The intention is to use these to normalize the summary statistics when doing distance-based
    inference, like rejection or mcmc abc. Due to the different scales of each summary statistic, the euclidean distance
    is not meaningful on the original summary statistics. Note that normalization also helps when using mdns, since it
    normalizes the neural net input.
    """

    n_sims = 1000
    stats = []
    i = 1

    while i <= n_sims:

        params = sim_prior_params()
        lv = mjp.LotkaVolterra(init, params)

        try:
            states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
        except mjp.SimTooLongException:
            continue

        stats.append(calc_summary_stats(states))

        print 'pilot simulation {0}'.format(i)
        i += 1

    stats = np.array(stats)
    means = np.mean(stats, axis=0)
    stds = np.std(stats, axis=0, ddof=1)

    helper.save((means, stds), datadir + 'pilot_run_results.pkl')
Esempio n. 2
0
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and
    distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    n_sims = 10 ** 7

    # load observed data and prior
    _, x, obs_data = helper.load(datadir + 'observed_data.pkl')
    prior = get_prior()

    # generate new data
    ws = np.empty([n_sims, n_dim])
    data = np.empty([n_sims, n_data])
    dist = np.empty(n_sims)

    for i in xrange(n_sims):

        w = prior.gen()[0]
        this_data = gen_y_data(w, x)

        ws[i] = w
        data[i] = this_data
        dist[i] = calc_dist(this_data, obs_data)

        print 'simulation {0}, distance = {1}'.format(i, dist[i])

    helper.save((ws, data, dist), datadir + 'sims_from_prior.pkl')
Esempio n. 3
0
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and
    distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    n_files = 10
    n_sims_per_file = 10**6

    _, obs_stats = helper.load(datadir + 'observed_data.pkl')

    for j in xrange(n_files):

        ps = np.empty([n_sims_per_file, 3])
        stats = np.empty([n_sims_per_file, n_percentiles])
        dist = np.empty(n_sims_per_file)

        for i in xrange(n_sims_per_file):
            ps[i] = sim_prior()
            _, _, _, idts, _ = sim_likelihood(*ps[i])
            stats[i] = calc_summary_stats(idts)
            dist[i] = calc_dist(stats[i], obs_stats)

            print 'simulation {0}, distance = {1}'.format(
                j * n_sims_per_file + i, dist[i])

        # save data
        filename = datadir + 'sims_from_prior_{0}.pkl'.format(j)
        helper.save((ps, stats, dist), filename)
Esempio n. 4
0
def do_pilot_run():
    """
    Runs a number of simulations, and it calculates and saves the mean and standard deviation of the summary statistics
    across simulations. The intention is to use these to normalize the summary statistics when doing distance-based
    inference, like rejection or mcmc abc. Due to the different scales of each summary statistic, the euclidean distance
    is not meaningful on the original summary statistics. Note that normalization also helps when using mdns, since it
    normalizes the neural net input.
    """

    n_sims = 10**5
    stats = np.empty([n_sims, n_percentiles])

    for i in xrange(n_sims):

        ps = sim_prior()
        _, _, _, idts, _ = sim_likelihood(*ps)
        stats[i] = calc_summary_stats(idts, whiten=False)

        print 'pilot simulation {0}'.format(i)

    means = np.mean(stats, axis=0)
    stats -= means

    cov = np.dot(stats.T, stats) / n_sims
    vars, U = np.linalg.eig(cov)
    istds = np.sqrt(1.0 / vars)

    helper.save((means, U, istds), datadir + 'pilot_run_results.pkl')
Esempio n. 5
0
def gen_observed_data():
    """Generates ground truth parameters and an observed dataset to be used later on for inference."""

    prior = get_prior()
    w = prior.gen()[0]
    x, y = gen_xy_data(w)

    helper.save((w, x, y), datadir + 'observed_data.pkl')
Esempio n. 6
0
def gen_observed_data():
    """Generates an observed dataset to be used later on for inference."""

    ps = true_ps
    _, _, _, idts, _ = sim_likelihood(*ps)
    stats = calc_summary_stats(idts)

    helper.save((ps, stats), datadir + 'observed_data.pkl')
Esempio n. 7
0
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and
    distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    n_sims = 10**7

    # generate new data
    ms, xs = sim_joint(n_sims)
    dist = calc_dist(xs, x_obs)

    # save data
    helper.save((ms, xs, dist), datadir + 'sims_from_prior.pkl')
Esempio n. 8
0
def run_sims_from_prior():
    """
    Runs several simulations with parameters sampled from the prior. Saves the parameters, normalized summary statistics
    and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns.
    """

    num_sims = 100000

    pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl')

    obs_stats = helper.load(datadir + 'obs_stats.pkl')
    obs_stats -= pilot_means
    obs_stats /= pilot_stds

    params = []
    stats = []
    dist = []

    for i in xrange(num_sims):

        prop_params = sim_prior_params()
        lv = mjp.LotkaVolterra(init, prop_params)

        try:
            states = lv.sim_time(dt, duration, max_n_steps=max_n_steps)
        except mjp.SimTooLongException:
            continue

        sum_stats = calc_summary_stats(states)
        sum_stats -= pilot_means
        sum_stats /= pilot_stds

        params.append(prop_params)
        stats.append(sum_stats)
        dist.append(calc_dist(sum_stats, obs_stats))

        print 'simulation {0}, distance = {1}'.format(i, dist[-1])

    params = np.array(params)
    stats = np.array(stats)
    dist = np.array(dist)

    filename = datadir + 'sims_from_prior_{0}.pkl'.format(time.time())
    helper.save((params, stats, dist), filename)
Esempio n. 9
0
def get_obs_stats():
    """
    Runs the lotka volterra simulation once with the true parameters, and saves the observed summary statistics.
    The intention is to use the observed summary statistics to perform inference on the parameters.
    """

    lv = mjp.LotkaVolterra(init, true_params)
    states = lv.sim_time(dt, duration)
    stats = calc_summary_stats(states)

    helper.save(stats, datadir + 'obs_stats.pkl')

    plt.figure()
    times = np.linspace(0.0, duration, int(duration / dt) + 1)
    plt.plot(times, states[:, 0], label='predators')
    plt.plot(times, states[:, 1], label='prey')
    plt.xlabel('time')
    plt.ylabel('counts')
    plt.title('params = {0}'.format(true_params))
    plt.legend(loc='upper right')
    plt.show()