def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ n_files = 10 n_sims_per_file = 10**6 _, obs_stats = helper.load(datadir + 'observed_data.pkl') for j in xrange(n_files): ps = np.empty([n_sims_per_file, 3]) stats = np.empty([n_sims_per_file, n_percentiles]) dist = np.empty(n_sims_per_file) for i in xrange(n_sims_per_file): ps[i] = sim_prior() _, _, _, idts, _ = sim_likelihood(*ps[i]) stats[i] = calc_summary_stats(idts) dist[i] = calc_dist(stats[i], obs_stats) print 'simulation {0}, distance = {1}'.format( j * n_sims_per_file + i, dist[i]) # save data filename = datadir + 'sims_from_prior_{0}.pkl'.format(j) helper.save((ps, stats, dist), filename)
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ n_sims = 10 ** 7 # load observed data and prior _, x, obs_data = helper.load(datadir + 'observed_data.pkl') prior = get_prior() # generate new data ws = np.empty([n_sims, n_dim]) data = np.empty([n_sims, n_data]) dist = np.empty(n_sims) for i in xrange(n_sims): w = prior.gen()[0] this_data = gen_y_data(w, x) ws[i] = w data[i] = this_data dist[i] = calc_dist(this_data, obs_data) print 'simulation {0}, distance = {1}'.format(i, dist[i]) helper.save((ws, data, dist), datadir + 'sims_from_prior.pkl')
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, normalized summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ num_sims = 100000 pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl') obs_stats = helper.load(datadir + 'obs_stats.pkl') obs_stats -= pilot_means obs_stats /= pilot_stds params = [] stats = [] dist = [] for i in xrange(num_sims): prop_params = sim_prior_params() lv = mjp.LotkaVolterra(init, prop_params) try: states = lv.sim_time(dt, duration, max_n_steps=max_n_steps) except mjp.SimTooLongException: continue sum_stats = calc_summary_stats(states) sum_stats -= pilot_means sum_stats /= pilot_stds params.append(prop_params) stats.append(sum_stats) dist.append(calc_dist(sum_stats, obs_stats)) print 'simulation {0}, distance = {1}'.format(i, dist[-1]) params = np.array(params) stats = np.array(stats) dist = np.array(dist) filename = datadir + 'sims_from_prior_{0}.pkl'.format(time.time()) helper.save((params, stats, dist), filename)
def show_true_posterior(): """Calculates analytically and shows the true posterior.""" w, x, y = helper.load(datadir + 'observed_data.pkl') prior = get_prior() posterior = calc_posterior(prior, x, y) helper.plot_pdf_marginals(pdf=prior, lims=[-3.0, 3.0], gt=w) helper.plot_pdf_marginals(pdf=posterior, lims=[-3.0, 3.0], gt=w)
def calc_summary_stats(data, whiten=True): """Given observations, calculate summary statistics.""" perc = np.linspace(0.0, 100.0, n_percentiles) stats = np.percentile(data, perc) if whiten: # whiten stats means, U, istds = helper.load(datadir + 'pilot_run_results.pkl') stats -= means stats = np.dot(stats, U) stats *= istds return stats
def load_sims_from_prior(n_files=10): """Loads the huge file(s) that store the results from simulations from the prior.""" ws = np.empty([0, n_dim]) data = np.empty([0, n_data]) dist = np.empty([0]) for i in xrange(n_files): ws_i, data_i, dist_i = helper.load(datadir + 'sims_from_prior_{0}.pkl'.format(i)) ws = np.concatenate([ws, ws_i], axis=0) data = np.concatenate([data, data_i], axis=0) dist = np.concatenate([dist, dist_i], axis=0) n_sims = ws.shape[0] assert n_sims == data.shape[0] assert n_sims == dist.shape[0] return ws, data, dist
def load_sims_from_prior(n_files=12): """Loads the huge file(s) that store the results from simulations from the prior.""" params = np.empty([0, 4]) stats = np.empty([0, 9]) dist = np.empty([0]) for i in xrange(n_files): params_i, stats_i, dist_i = helper.load( datadir + 'sims_from_prior_{0}.pkl'.format(i)) params = np.concatenate([params, params_i], axis=0) stats = np.concatenate([stats, stats_i], axis=0) dist = np.concatenate([dist, dist_i], axis=0) n_sims = params.shape[0] assert n_sims == stats.shape[0] assert n_sims == dist.shape[0] return params, stats, dist
def show_histograms(n_samples=1000): """Simulates from joint and shows histograms of simulations.""" true_ps, obs_stats = helper.load(datadir + 'observed_data.pkl') ps = np.empty([n_samples, 3]) stats = np.empty([n_samples, n_percentiles]) for i in xrange(n_samples): ps[i] = sim_prior() _, _, _, idts, _ = sim_likelihood(*ps[i]) stats[i] = calc_summary_stats(idts) # plot prior parameter histograms helper.plot_hist_marginals(ps, lims=disp_lims, gt=true_ps) plt.gcf().suptitle('p(thetas)') # plot stats histograms helper.plot_hist_marginals(stats, gt=obs_stats) plt.gcf().suptitle('p(stats)') plt.show(block=False)
def sum_stats_hist(): """ Runs several simulations with given parameters and plots a histogram of the resulting normalized summary statistics. """ n_sims = 1000 sum_stats = [] i = 1 pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl') while i <= n_sims: lv = mjp.LotkaVolterra(init, true_params) try: states = lv.sim_time(dt, duration, max_n_steps=max_n_steps) except mjp.SimTooLongException: continue sum_stats.append(calc_summary_stats(states)) print 'simulation {0}'.format(i) i += 1 sum_stats = np.array(sum_stats) sum_stats -= pilot_means sum_stats /= pilot_stds _, axs = plt.subplots(3, 3) nbins = int(np.sqrt(n_sims)) for i, ax in enumerate(axs.flatten()): ax.hist(sum_stats[:, i], nbins, normed=True) ax.set_title('stat ' + str(i + 1)) plt.show()