def do_pilot_run(): """ Runs a number of simulations, and it calculates and saves the mean and standard deviation of the summary statistics across simulations. The intention is to use these to normalize the summary statistics when doing distance-based inference, like rejection or mcmc abc. Due to the different scales of each summary statistic, the euclidean distance is not meaningful on the original summary statistics. Note that normalization also helps when using mdns, since it normalizes the neural net input. """ n_sims = 1000 stats = [] i = 1 while i <= n_sims: params = sim_prior_params() lv = mjp.LotkaVolterra(init, params) try: states = lv.sim_time(dt, duration, max_n_steps=max_n_steps) except mjp.SimTooLongException: continue stats.append(calc_summary_stats(states)) print 'pilot simulation {0}'.format(i) i += 1 stats = np.array(stats) means = np.mean(stats, axis=0) stds = np.std(stats, axis=0, ddof=1) helper.save((means, stds), datadir + 'pilot_run_results.pkl')
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ n_sims = 10 ** 7 # load observed data and prior _, x, obs_data = helper.load(datadir + 'observed_data.pkl') prior = get_prior() # generate new data ws = np.empty([n_sims, n_dim]) data = np.empty([n_sims, n_data]) dist = np.empty(n_sims) for i in xrange(n_sims): w = prior.gen()[0] this_data = gen_y_data(w, x) ws[i] = w data[i] = this_data dist[i] = calc_dist(this_data, obs_data) print 'simulation {0}, distance = {1}'.format(i, dist[i]) helper.save((ws, data, dist), datadir + 'sims_from_prior.pkl')
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ n_files = 10 n_sims_per_file = 10**6 _, obs_stats = helper.load(datadir + 'observed_data.pkl') for j in xrange(n_files): ps = np.empty([n_sims_per_file, 3]) stats = np.empty([n_sims_per_file, n_percentiles]) dist = np.empty(n_sims_per_file) for i in xrange(n_sims_per_file): ps[i] = sim_prior() _, _, _, idts, _ = sim_likelihood(*ps[i]) stats[i] = calc_summary_stats(idts) dist[i] = calc_dist(stats[i], obs_stats) print 'simulation {0}, distance = {1}'.format( j * n_sims_per_file + i, dist[i]) # save data filename = datadir + 'sims_from_prior_{0}.pkl'.format(j) helper.save((ps, stats, dist), filename)
def do_pilot_run(): """ Runs a number of simulations, and it calculates and saves the mean and standard deviation of the summary statistics across simulations. The intention is to use these to normalize the summary statistics when doing distance-based inference, like rejection or mcmc abc. Due to the different scales of each summary statistic, the euclidean distance is not meaningful on the original summary statistics. Note that normalization also helps when using mdns, since it normalizes the neural net input. """ n_sims = 10**5 stats = np.empty([n_sims, n_percentiles]) for i in xrange(n_sims): ps = sim_prior() _, _, _, idts, _ = sim_likelihood(*ps) stats[i] = calc_summary_stats(idts, whiten=False) print 'pilot simulation {0}'.format(i) means = np.mean(stats, axis=0) stats -= means cov = np.dot(stats.T, stats) / n_sims vars, U = np.linalg.eig(cov) istds = np.sqrt(1.0 / vars) helper.save((means, U, istds), datadir + 'pilot_run_results.pkl')
def gen_observed_data(): """Generates ground truth parameters and an observed dataset to be used later on for inference.""" prior = get_prior() w = prior.gen()[0] x, y = gen_xy_data(w) helper.save((w, x, y), datadir + 'observed_data.pkl')
def gen_observed_data(): """Generates an observed dataset to be used later on for inference.""" ps = true_ps _, _, _, idts, _ = sim_likelihood(*ps) stats = calc_summary_stats(idts) helper.save((ps, stats), datadir + 'observed_data.pkl')
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ n_sims = 10**7 # generate new data ms, xs = sim_joint(n_sims) dist = calc_dist(xs, x_obs) # save data helper.save((ms, xs, dist), datadir + 'sims_from_prior.pkl')
def run_sims_from_prior(): """ Runs several simulations with parameters sampled from the prior. Saves the parameters, normalized summary statistics and distances with the observed summary statistic. Intention is to use the data for rejection abc and to train mdns. """ num_sims = 100000 pilot_means, pilot_stds = helper.load(datadir + 'pilot_run_results.pkl') obs_stats = helper.load(datadir + 'obs_stats.pkl') obs_stats -= pilot_means obs_stats /= pilot_stds params = [] stats = [] dist = [] for i in xrange(num_sims): prop_params = sim_prior_params() lv = mjp.LotkaVolterra(init, prop_params) try: states = lv.sim_time(dt, duration, max_n_steps=max_n_steps) except mjp.SimTooLongException: continue sum_stats = calc_summary_stats(states) sum_stats -= pilot_means sum_stats /= pilot_stds params.append(prop_params) stats.append(sum_stats) dist.append(calc_dist(sum_stats, obs_stats)) print 'simulation {0}, distance = {1}'.format(i, dist[-1]) params = np.array(params) stats = np.array(stats) dist = np.array(dist) filename = datadir + 'sims_from_prior_{0}.pkl'.format(time.time()) helper.save((params, stats, dist), filename)
def get_obs_stats(): """ Runs the lotka volterra simulation once with the true parameters, and saves the observed summary statistics. The intention is to use the observed summary statistics to perform inference on the parameters. """ lv = mjp.LotkaVolterra(init, true_params) states = lv.sim_time(dt, duration) stats = calc_summary_stats(states) helper.save(stats, datadir + 'obs_stats.pkl') plt.figure() times = np.linspace(0.0, duration, int(duration / dt) + 1) plt.plot(times, states[:, 0], label='predators') plt.plot(times, states[:, 1], label='prey') plt.xlabel('time') plt.ylabel('counts') plt.title('params = {0}'.format(true_params)) plt.legend(loc='upper right') plt.show()