def test_sum_stats_save_load(history: History): arr = sp.random.rand(10) arr2 = sp.random.rand(10, 2) particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss1": .1, "ss2": arr2, "ss3": example_df(), "rdf0": r["iris"] }], [], True), Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss12": .11, "ss22": arr, "ss33": example_df(), "rdf": r["mtcars"] }], [], True) ] history.append_population(0, 42, Population(particle_list), 2, ["m1", "m2"]) weights, sum_stats = history.get_sum_stats(0, 0) assert (weights == 0.5).all() assert sum_stats[0]["ss1"] == .1 assert (sum_stats[0]["ss2"] == arr2).all() assert (sum_stats[0]["ss3"] == example_df()).all().all() assert (sum_stats[0]["rdf0"] == pandas2ri.ri2py(r["iris"])).all().all() assert sum_stats[1]["ss12"] == .11 assert (sum_stats[1]["ss22"] == arr).all() assert (sum_stats[1]["ss33"] == example_df()).all().all() assert (sum_stats[1]["rdf"] == pandas2ri.ri2py(r["mtcars"])).all().all()
def test_sum_stats_save_load(history: History): arr = sp.random.rand(10) arr2 = sp.random.rand(10, 2) particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss1": .1, "ss2": arr2, "ss3": example_df(), "rdf0": r["faithful"]}], # TODO: check why iris fails accepted_distances=[.1]), Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss12": .11, "ss22": arr, "ss33": example_df(), "rdf": r["mtcars"]}], accepted_distances=[.1])] history.append_population(0, 42, Population(particle_list), 2, ["m1", "m2"]) weights, sum_stats = history.get_weighted_sum_stats_for_model(0, 0) assert (weights == 0.5).all() assert sum_stats[0]["ss1"] == .1 assert (sum_stats[0]["ss2"] == arr2).all() assert (sum_stats[0]["ss3"] == example_df()).all().all() assert (sum_stats[0]["rdf0"] == pandas2ri.ri2py(r["faithful"])).all().all() assert sum_stats[1]["ss12"] == .11 assert (sum_stats[1]["ss22"] == arr).all() assert (sum_stats[1]["ss33"] == example_df()).all().all() assert (sum_stats[1]["rdf"] == pandas2ri.ri2py(r["mtcars"])).all().all()
def simulate_one(): # sample model m = int(model_prior.rvs()) # sample parameter theta = parameter_priors[m].rvs() # simulate summary statistics model_result = models[m].summary_statistics( t, theta, summary_statistics) # sampled from prior, so all have uniform weight weight = 1.0 # remember sum stat as accepted accepted_sum_stats = [model_result.sum_stats] # distance will be computed after initialization of the # distance function accepted_distances = [np.inf] # all are happy and accepted accepted = True return Particle( m=m, parameter=theta, weight=weight, accepted_sum_stats=accepted_sum_stats, accepted_distances=accepted_distances, rejected_sum_stats=[], rejected_distances=[], accepted=accepted)
def simulate_one(): return Particle(m=0, parameter={}, weight=0, accepted_sum_stats=[], accepted_distances=[], accepted=True)
def rand_pop_list(m: int): """ Create a population for model m, of random size >= 3. Parameters ---------- m: int the model number Returns ------- """ pop = [ Particle(m=m, parameter=Parameter({ "a": np.random.randint(10), "b": np.random.randn() }), weight=np.random.rand() * 42, sum_stat={ "ss_float": 0.1, "ss_int": 42, "ss_str": "foo bar string", "ss_np": np.random.rand(13, 42), "ss_df": example_df() }, distance=np.random.rand()) for _ in range(np.random.randint(10) + 3) ] return pop
def test_single_particle_save_load_np_int64(history: History): # Test if np.int64 can also be used for indexing # This is an important test!!! m_list = [0, np.int64(0)] t_list = [0, np.int64(0)] particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, accepted_sum_stats=[{ "ss": .1 }], accepted_distances=[.1]) ] history.append_population(0, 42, Population(particle_list), 2, [""]) for m in m_list: for t in t_list: df, w = history.get_distribution(m, t) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def rand_pop(m: int): """ Parameters ---------- m: int the model number Returns ------- """ pop = [ Particle( m, Parameter({ "a": np.random.randint(10), "b": np.random.randn() }), sp.rand() * 42, [sp.rand()], [{ "ss_float": 0.1, "ss_int": 42, "ss_str": "foo bar string", "ss_np": sp.rand(13, 42), "ss_df": example_df() }], [], True) for _ in range(np.random.randint(10) + 3) ] return pop
def simulate_one(): # sample model m = int(model_prior.rvs()) # sample parameter theta = parameter_priors[m].rvs() # simulate summary statistics model_result = models[m].summary_statistics(0, theta, summary_statistics) # sampled from prior, so all have uniform weight weight = 1.0 # distance will be computed after initialization of the # distance function distance = np.inf # all are happy and accepted accepted = True return Particle( m=m, parameter=theta, weight=weight, sum_stat=model_result.sum_stat, distance=distance, accepted=accepted, proposal_id=0, preliminary=False, )
def test_sum_stats_save_load(history: History): arr = np.random.rand(10) arr2 = np.random.rand(10, 2) particle_list = [ Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, sum_stat={ "ss1": .1, "ss2": arr2, "ss3": example_df(), "rdf0": r["iris"] }, distance=.1), Particle(m=0, parameter=Parameter({ "a": 23, "b": 12 }), weight=.2, sum_stat={ "ss12": .11, "ss22": arr, "ss33": example_df(), "rdf": r["mtcars"] }, distance=.1) ] history.append_population(0, 42, Population(particle_list), 2, ["m1", "m2"]) weights, sum_stats = history.get_weighted_sum_stats_for_model(0, 0) assert (weights == 0.5).all() assert sum_stats[0]["ss1"] == .1 assert (sum_stats[0]["ss2"] == arr2).all() assert (sum_stats[0]["ss3"] == example_df()).all().all() with localconverter(pandas2ri.converter): assert (sum_stats[0]["rdf0"] == r["iris"]).all().all() assert sum_stats[1]["ss12"] == .11 assert (sum_stats[1]["ss22"] == arr).all() assert (sum_stats[1]["ss33"] == example_df()).all().all() with localconverter(pandas2ri.converter): assert (sum_stats[1]["rdf"] == r["mtcars"]).all().all()
def evaluate_preliminary_particle( particle: Particle, t, ana_vars: AnalysisVars) -> Particle: """Evaluate a preliminary particle. I.e. compute distance and check acceptance. Returns ------- evaluated_particle: The evaluated particle """ if not particle.preliminary: raise AssertionError("Particle is not preliminary") # for results accepted_sum_stats = [] accepted_distances = [] accepted_weights = [] rejected_sum_stats = [] rejected_distances = [] for sum_stat in particle.accepted_sum_stats: acc_res = ana_vars.acceptor( distance_function=ana_vars.distance_function, eps=ana_vars.eps, x=sum_stat, x_0=ana_vars.x_0, t=t, par=particle.parameter) if acc_res.accept: accepted_sum_stats.append(sum_stat) accepted_distances.append(acc_res.distance) # the acceptance weight accepted_weights.append(acc_res.weight) else: rejected_sum_stats.append(sum_stat) rejected_distances.append(acc_res.distance) # reconstruct weighting function from `weight_function` sampling_weight = particle.weight fr_accepted_for_par = \ len(accepted_sum_stats) / ana_vars.nr_samples_per_parameter # the weight is the sampling weight times the acceptance weight(s) weight = sampling_weight * np.prod(accepted_weights) * \ fr_accepted_for_par # return the evaluated particle return Particle( m=particle.m, parameter=particle.parameter, weight=weight, accepted_sum_stats=accepted_sum_stats, accepted_distances=accepted_distances, rejected_sum_stats=rejected_sum_stats, rejected_distances=rejected_distances, accepted=len(accepted_distances) > 0, preliminary=False, proposal_id=particle.proposal_id, )
def evaluate_proposal( m_ss: int, theta_ss: Parameter, t: int, models: List[Model], summary_statistics: Callable, distance_function: Distance, eps: Epsilon, acceptor: Acceptor, x_0: dict, weight_function: Callable, proposal_id: int, ) -> Particle: """Evaluate a proposed parameter. Parameters ---------- m_ss, theta_ss: The proposed (model, parameter) sample. t: The current time. models: List of all models. summary_statistics: Function to compute summary statistics from model output. distance_function: The distance function. eps: The epsilon threshold. acceptor: The acceptor. x_0: The observed summary statistics. weight_function: Function by which to reweight the sample. proposal_id: Id of the transition kernel. Returns ------- particle: A particle containing all information. Data for the given parameters theta_ss are simulated, summary statistics computed and evaluated. """ # simulate, compute distance, check acceptance model_result = models[m_ss].accept(t, theta_ss, summary_statistics, distance_function, eps, acceptor, x_0) # compute acceptance weight if model_result.accepted: weight = weight_function(m_ss, theta_ss, model_result.weight) else: weight = 0 return Particle( m=m_ss, parameter=theta_ss, weight=weight, sum_stat=model_result.sum_stat, distance=model_result.distance, accepted=model_result.accepted, preliminary=False, proposal_id=proposal_id, )
def test_t_count(history: History): particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1])] for t in range(1, 10): history.append_population(t, 42, Population(particle_list), 2, ["m1"]) assert t == history.max_t
def test_total_nr_samples(history: History): particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1])] population = Population(particle_list) history.append_population(0, 42, population, 4234, ["m1"]) history.append_population(0, 42, population, 3, ["m1"]) assert 4237 == history.total_nr_simulations
def test_t_count(history: History): particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] for t in range(1, 10): history.append_population(t, 42, Population(particle_list), 2, ["m1"]) assert t == history.max_t
def test_save_no_sum_stats(history: History): """ Test that what has been stored can be retrieved correctly also when no sum stats are saved. """ particle_list = [] for _ in range(0, 6): particle = Particle( m=0, parameter=Parameter({"th0": np.random.random()}), weight=1.0 / 6, sum_stat={"ss0": np.random.random(), "ss1": np.random.random()}, distance=np.random.random(), ) particle_list.append(particle) population = Population(particle_list) # do not save sum stats # use the attribute first to make sure we have no typo print(history.stores_sum_stats) history.stores_sum_stats = False # test some basic routines history.append_population( t=0, current_epsilon=42.97, population=population, nr_simulations=10, model_names=[""], ) # just call history.get_distribution(0, 0) # test whether weights and distances returned correctly weighted_distances_h = history.get_weighted_distances() weighted_distances = population.get_weighted_distances() assert np.allclose( weighted_distances_h[['distance', 'w']], weighted_distances[['distance', 'w']], ) weights, sum_stats = history.get_weighted_sum_stats(t=0) # all particles should be contained nonetheless assert len(weights) == len(particle_list) for sum_stat in sum_stats: # should be empty assert not sum_stat history.get_population_extended()
def test_t_count(history: History): particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ) ] for t in range(1, 10): history.append_population(t, 42, Population(particle_list), 2, ["m1"]) assert t == history.max_t
def test_single_particle_save_load(history: History): particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1]) ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_total_nr_samples(history: History): particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] population = Population(particle_list) history.append_population(0, 42, population, 4234, ["m1"]) history.append_population(0, 42, population, 3, ["m1"]) assert 4237 == history.total_nr_simulations
def test_single_particle_save_load(history: History): particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle(m=0, parameter=Parameter({"a": 23, "b": 12}), weight=.2, accepted_sum_stats=[{"ss": .1}], accepted_distances=[.1])] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db_identifier) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def test_total_nr_samples(history: History): particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ) ] population = Population(particle_list) history.append_population(0, 42, population, 4234, ["m1"]) history.append_population(0, 42, population, 3, ["m1"]) assert 4237 == history.total_nr_simulations
def only_simulate_data_for_proposal( m_ss: int, theta_ss: Parameter, t: int, nr_samples_per_parameter: int, models: List[Model], summary_statistics: Callable, weight_function: Callable, proposal_id: int, ) -> Particle: """Simulate data for parameters. Similar to `evaluate_proposal`, however here for the passed parameters only data are simulated, but no distances calculated or acceptance checked. That needs to be done post-hoc then, not checked here.""" # for the results accepted_sum_stats = [] # distance and weight are just dummies here, they need to be recomputed # later again accepted_distances = [] accepted_weights = [] # perform nr_samples_per_parameter simulations for _ in range(nr_samples_per_parameter): # simulate model_result = models[m_ss].summary_statistics( t, theta_ss, summary_statistics) accepted_sum_stats.append(model_result.sum_stats) # fill in dummies for distance and weight accepted_distances.append(np.inf) accepted_weights.append(1.) # needs to be accepted in order to be forwarded by the sampler, and so # as a single particle accepted = True # compute acceptance weight # TODO later replacement only works with nr_samples_per_parameter == 1 weight = weight_function( accepted_distances, m_ss, theta_ss, accepted_weights) return Particle( m=m_ss, parameter=theta_ss, weight=weight, accepted_sum_stats=accepted_sum_stats, accepted_distances=accepted_distances, accepted=accepted, preliminary=True, proposal_id=proposal_id, )
def test_single_particle_save_load(history: History): particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ), ] history.append_population(0, 42, Population(particle_list), 2, [""]) df, w = history.get_distribution(0, 0) assert w[0] == 1 assert df.a.iloc[0] == 23 assert df.b.iloc[0] == 12
def sample_from_prior(self) -> Sample: sample = Sample(record_rejected=True) for sumstat, accepted in zip(self.sumstats, self.accepted_list): sample.append( Particle( m=0, parameter=Parameter({ 'p1': np.random.randint(10), 'p2': np.random.randn() }), weight=np.random.uniform(), sum_stat=sumstat, distance=np.random.uniform(), accepted=accepted, ), ) return sample
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle(0, Parameter({ "a": 23, "b": 12 }), .2, [.1], [{ "ss": .1 }], [], True) ] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db_identifier) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def test_model_name_load_single_with_pop(history_uninitialized: History): h = history_uninitialized model_names = ["m1"] h.store_initial_data(0, {}, {}, {}, model_names, "", "", "") particle_list = [ Particle( m=0, parameter=Parameter({"a": 23, "b": 12}), weight=1.0, sum_stat={"ss": 0.1}, distance=0.1, ) ] h.append_population(0, 42, Population(particle_list), 2, model_names) h2 = History(h.db) model_names_loaded = h2.model_names() assert model_names == model_names_loaded
def rand_pop_list(m: int = 0, normalized: bool = True, n_sample: int = None): """ Create a population for model m, of random size >= 3. Parameters ---------- m: The model index normalized: Whether to normalize the population weight to 1. n_sample: Number of samples. Returns ------- List[Particle]: A list of particles """ if n_sample is None: n_sample = np.random.randint(10) + 3 pop = [ Particle( m=m, parameter=Parameter( {"a": np.random.randint(10), "b": np.random.randn()} ), weight=np.random.rand() * 42, sum_stat={ "ss_float": 0.1, "ss_int": 42, "ss_str": "foo bar string", "ss_np": np.random.rand(13, 42), "ss_df": example_df(), }, accepted=True, distance=np.random.rand(), ) for _ in range(n_sample) ] if normalized: total_weight = sum(p.weight for p in pop) for p in pop: p.weight /= total_weight return pop
def evaluate_preliminary_particle(particle: Particle, t, ana_vars: AnalysisVars) -> Particle: """Evaluate a preliminary particle. I.e. compute distance and check acceptance. Returns ------- evaluated_particle: The evaluated particle """ if not particle.preliminary: raise AssertionError("Particle is not preliminary") acc_res = ana_vars.acceptor( distance_function=ana_vars.distance_function, eps=ana_vars.eps, x=particle.sum_stat, x_0=ana_vars.x_0, t=t, par=particle.parameter, ) # reconstruct weighting function from `weight_function` sampling_weight = particle.weight # the weight is the sampling weight times the acceptance weight(s) if acc_res.accept: weight = sampling_weight * acc_res.weight else: weight = 0 # return the evaluated particle return Particle( m=particle.m, parameter=particle.parameter, weight=weight, sum_stat=particle.sum_stat, distance=acc_res.distance, accepted=acc_res.accept, preliminary=False, proposal_id=particle.proposal_id, )
def only_simulate_data_for_proposal( m_ss: int, theta_ss: Parameter, t: int, models: List[Model], summary_statistics: Callable, weight_function: Callable, proposal_id: int, ) -> Particle: """Simulate data for parameters. Similar to `evaluate_proposal`, however here for the passed parameters only data are simulated, but no distances calculated or acceptance checked. That needs to be done post-hoc then, not checked here.""" # simulate model_result = models[m_ss].summary_statistics(t, theta_ss, summary_statistics) # dummies for distance and weight, need to be recomputed later distance = np.inf acceptance_weight = 1. # needs to be accepted in order to be forwarded by the sampler, and so # as a single particle accepted = True # compute weight weight = weight_function(m_ss, theta_ss, acceptance_weight) return Particle( m=m_ss, parameter=theta_ss, weight=weight, sum_stat=model_result.sum_stat, distance=distance, accepted=accepted, preliminary=True, proposal_id=proposal_id, )
def __call__(self, *args, **kwargs): return Particle(m=0, parameter={}, weight=1, sum_stat={}, distance=1)