def test_simple_inference_can_be_run(): n_samples = 4 true_params = [0.5, 0.5] # t1, t2 for sampling_type in ["grid", "uniform", "BO"]: params = BolfiParams(bounds=((0, 1), (0, 1)), n_samples=n_samples, n_initial_evidence=2, sampling_type=sampling_type, grid_tics=[[0.25, 0.75], [0.33, 0.66]], seed=1, simulator_node_name="MA2", discrepancy_node_name="d") model = get_model(n_obs=1000, true_params=true_params, seed_obs=1) results = list() bf = BolfiFactory(model, params) exp = bf.get() exp.do_sampling() exp.compute_samples_and_ML() assert len(exp.samples) == n_samples assert exp.ML_val is not None exp.compute_posterior() assert exp.post is not None exp.compute_MAP() assert exp.MAP_val is not None ML_sim = exp.simulate_data(exp.ML) MAP_sim = exp.simulate_data(exp.MAP) ML_disc = exp.compute_discrepancy_with_data(exp.ML, ML_sim) MAP_disc = exp.compute_discrepancy_with_data(exp.MAP, MAP_sim)
def modified_experiment(grid_params, elfi_params, rl_params, bolfi_params, obs_data, test_data, plot_data, types, replicates, region_size, ground_truth, n_cores, path_max_len, obs_set_size, seed, pdf, figsize): elfi.new_model() model = get_model("approx", grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() bounds = elfi_params.get_bounds() ret = dict() ret["n_cores"] = n_cores ret["MD"] = dict() random_state = np.random.RandomState(seed) for k, v in bounds.items(): ret["MD"][k] = random_state.uniform(v[0], v[1]) print("Random location: {}".format(ret["MD"])) ret["sampling_duration"] = 0 ret["samples"] = dict() ret["n_samples"] = 0 ret = PointEstimateSimulationPhase(replicates=replicates, region_size=region_size).run( inference_task, ret) ret = PlottingPhase(pdf=pdf, figsize=figsize, obs_data=obs_data, test_data=test_data, plot_data=plot_data).run(inference_task, ret) ret = GroundTruthErrorPhase(ground_truth=ground_truth).run( inference_task, ret) ret = PredictionErrorPhase(test_data=test_data).run( inference_task, ret) return ret
def test_simple_inference_can_be_run_consistently(): for sampling_type in ["grid", "uniform", "BO"]: params = BolfiParams(bounds=((0, 1), (0, 1)), n_samples=4, n_initial_evidence=2, sampling_type=sampling_type, grid_tics=[[0.25, 0.75], [0.33, 0.66]], seed=1, discrepancy_node_name="d") model = get_model() results = list() bf = BolfiFactory(model, params) for i in range(2): post = bf.get().run() results.append(post.ML[0]) np.testing.assert_array_almost_equal(results[0], results[1])
def modified_experiment(grid_params, elfi_params, rl_params, bolfi_params, obs_data, test_data, plot_data, types, replicates, region_size, ground_truth, n_cores, path_max_len, pdf, figsize): elfi.new_model() model = get_model(method, grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len) inference_task = BolfiFactory(model, bolfi_params).get() ret = dict() ret["n_cores"] = n_cores ret = SamplingPhase().run(inference_task, ret) ret = PosteriorAnalysisPhase(types=types).run(inference_task, ret) return ret
def test_simple_inference_experiment_can_be_run(): n_samples = 4 true_params = [0.5, 0.5] # t1, t2 params = BolfiParams(bounds=((0, 1), (0, 1)), n_samples=n_samples, n_initial_evidence=2, sampling_type="uniform", seed=1, simulator_node_name="MA2", discrepancy_node_name="d") model = get_model(n_obs=1000, true_params=true_params, seed_obs=1) bf = BolfiFactory(model, params) test_data = model.generate(1)["MA2"][0] ground_truth = {"t1": 0.5, "t2": 0.5} inference_experiment(bf, ground_truth=ground_truth, test_data=test_data)
def modified_experiment(grid_params, elfi_params, rl_params, bolfi_params, obs_data, test_data, plot_data, types, replicates, region_size, ground_truth, n_cores, path_max_len, obs_set_size, pdf, figsize): elfi.new_model() model = get_model(method, grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() ret = dict() ret["n_cores"] = n_cores ret = SamplingPhase().run(inference_task, ret) ret = PosteriorAnalysisPhase(types=types).run(inference_task, ret) ret["plots_logl"] = inference_task.plot_post(pdf, figsize) elfi.new_model() model = get_model("approx", grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() ret = PointEstimateSimulationPhase(replicates=replicates, region_size=region_size).run( inference_task, ret) ret = LikelihoodSamplesSimulationPhase(replicates=replicates).run( inference_task, ret) ret = PlottingPhase(pdf=pdf, figsize=figsize, obs_data=obs_data, test_data=test_data, plot_data=plot_data).run(inference_task, ret) ret = GroundTruthErrorPhase(ground_truth=ground_truth).run( inference_task, ret) ret = PredictionErrorPhase(test_data=test_data).run( inference_task, ret) return ret
def run_experiment(seed, method, scale, cores, samples): p = ModelParams([ {"name": "focus_duration_100ms", "distr": "truncnorm", "minv": 0.0, "maxv": 5.0, "mean": 3.0, "std": 1.0, "acq_noise": 0.0, "kernel_scale": 1.0, "L": 2.0, "ntics": scale, }, {"name": "selection_delay_s", "distr": "truncnorm", "minv": 0.0, "maxv": 1.0, "mean": 0.3, "std": 0.3, "acq_noise": 0.0, "kernel_scale": 0.2, "L": 10.0, "ntics": scale, }, {"name": "menu_recall_probability", "distr": "beta", "minv": 0.0, "maxv": 1.0, "a": 3.0, "b": 1.35, "acq_noise": 0.0, "kernel_scale": 0.2, "L": 10.0, "ntics": scale, }, {"name": "p_obs_adjacent", "distr": "constant", "val": 0.93 }, ]) if method == "bo": gp_params_update_interval = min(samples/2, 3*(cores-1)) types = ["MED", "MAP", "POST"] else: gp_params_update_interval = 9999 types = ["MD"] grid_tics = None if method == "grid": parallel_batches = samples grid_tics = p.get_grid_tics(seed) else: parallel_batches = cores-1 training_data = BaillyData( menu_type="Semantic", # allowed_users=["S40"], # excluded_users=[], allowed_users=[], excluded_users=["S22", "S6", "S41", "S7", "S5", "S8", "S20", "S36", "S24"], trials_per_user_present=9999, # all trials_per_user_absent=9999).get() # all test_data = BaillyData( menu_type="Semantic", # allowed_users=[], allowed_users=["S22", "S6", "S41", "S7", "S5", "S8", "S20", "S36", "S24"], excluded_users=[], trials_per_user_present=9999, # all trials_per_user_absent=9999).get() # all rl_params = RLParams( n_training_episodes=5000000, n_episodes_per_epoch=1000, n_simulation_episodes=10000, q_alpha=0.05, q_w=0.3, q_gamma=0.99, q_iters=1, exp_epsilon=0.2, exp_decay=1.0) menu_params = MenuParams( menu_type="semantic", menu_groups=2, menu_items_per_group=4, semantic_levels=3, gap_between_items=0.75, prop_target_absent=0.1, length_observations=True, p_obs_len_cur=0.95, p_obs_len_adj=0.89, n_training_menus=50000, max_number_of_actions_per_session=15) bolfi_params = BolfiParams( bounds=p.get_bounds(), grid_tics=grid_tics, acq_noise_cov=p.get_acq_noises(), noise_var=0.5, kernel_var=4.0, kernel_scale=p.get_lengthscales(), kernel_prior={"scale_E": 2.0, "scale_V": 2.0, "var_E": 2.0, "var_V": 2.0, "noise_E": 2.0, "noise_V": 2.0}, ARD=True, n_samples=samples, n_initial_evidence=0, parallel_batches=parallel_batches, gp_params_optimizer="simplex", gp_params_max_opt_iters=1000, gp_params_update_interval=gp_params_update_interval, observed_node_name="simulator", abc_threshold_delta=0.01, batch_size=1, sampling_type=method, seed=seed) model = get_model(menu_params, p.get_elfi_params(), rl_params, training_data) inference_factory = BolfiFactory(model, bolfi_params) file_path = os.path.dirname(os.path.realpath(__file__)) exp = partial(inference_experiment, inference_factory, test_data=test_data, obs_data=training_data, plot_data=plot_data, types=types, n_cores=cores, replicates=10, region_size=0.02) run_and_report(exp, file_path)
def run_experiment(seed, method, scale, cores, samples): logger.info("Running choice model with parameters") logger.info(" * seed = {}".format(seed)) logger.info(" * method = {}".format(method)) logger.info(" * scale = {}".format(scale)) logger.info(" * cores = {}".format(cores)) logger.info(" * samples = {}".format(samples)) p = ModelParams([ { "name": "s", "distr": "constant", "val": 1.0, }, { "name": "theta", "distr": "uniform", "minv": 0.0, "maxv": 400.0, "acq_noise": 5.0, "kernel_scale": 20.0, "L": 0.01, "ntics": scale, }, { "name": "a", "distr": "constant", "val": 0.0, }, { "name": "b", "distr": "constant", "val": 0.0, }, { "name": "alpha", "distr": "uniform", "minv": 0.4, "maxv": 1.5, "acq_noise": 0.1, "kernel_scale": 0.2, "L": 10.0, "ntics": scale, }, ]) if method == "bo": gp_params_update_interval = 3 * (cores - 1) # after every third batch skip_post = False else: gp_params_update_interval = 9999 skip_post = True if method == "grid": parallel_batches = samples else: parallel_batches = cores - 1 training_data = get_dataset() model_params = ChoiceParams(n_trajectories=1000, timestep=0.01, t_max=10, A_gain=6000, A_loss=0, B_gain=3000, B_loss=0, A_prob=0.45, B_prob=0.9) bolfi_params = BolfiParams( bounds=p.get_bounds(), grid_tics=p.get_grid_tics(seed) if method is "grid" else None, acq_noise_cov=p.get_acq_noises(), noise_var=0.01, kernel_var=10.0, kernel_scale=p.get_lengthscales(), L=p.get_L(), ARD=True, n_samples=samples, n_initial_evidence=0, parallel_batches=parallel_batches, gp_params_update_interval=gp_params_update_interval, abc_threshold_delta=0.01, batch_size=1, sampling_type=method, seed=seed) model = get_model(model_params, p.get_elfi_params(), training_data) inference_factory = BolfiFactory(model, bolfi_params) file_path = os.path.dirname(os.path.realpath(__file__)) exp = partial(inference_experiment, inference_factory, skip_post=skip_post, obs_data=training_data, test_data=training_data, plot_data=plot_data, n_cores=cores, replicates=2, region_size=0.02) run_and_report(exp, file_path)
def run_experiment(seed, method, grid_size, n_features, cores, samples): if n_features == 2: p = ModelParams([ { "name": "feature1_value", "distr": "uniform", "minv": -1.0, "maxv": 0.0, "acq_noise": 0.1, "kernel_scale": 0.1, "ntics": 0, }, { "name": "feature2_value", "distr": "uniform", "minv": -1.0, "maxv": 0.0, "acq_noise": 0.1, "kernel_scale": 0.1, "ntics": 0, }, ]) if n_features == 3: p = ModelParams([ { "name": "feature1_value", "distr": "uniform", "minv": -1.0, "maxv": 0.0, "acq_noise": 0.1, "kernel_scale": 0.1, "ntics": 0, }, { "name": "feature2_value", "distr": "uniform", "minv": -1.0, "maxv": 0.0, "acq_noise": 0.1, "kernel_scale": 0.1, "ntics": 0, }, { "name": "feature3_value", "distr": "uniform", "minv": -1.0, "maxv": 0.0, "acq_noise": 0.1, "kernel_scale": 0.1, "ntics": 0, }, ]) elfi_params = p.get_elfi_params() gp_params_update_interval = (cores - 1) * 2 # after every second batch parallel_batches = cores - 1 obs_set_size = 1000 if grid_size < 12: path_max_len = 12 # limit to make exact method feasible sim_set_size = 2 * obs_set_size else: path_max_len = None sim_set_size = obs_set_size training_eps = 2000 * grid_size if method in ["exact", "sample", "sample_l"]: noisy_posterior = True model_scale = -1000.0 else: noisy_posterior = False model_scale = 1.0 rl_params = RLParams(n_training_episodes=training_eps, n_episodes_per_epoch=500, n_simulation_episodes=sim_set_size, q_alpha=0.2, q_w=0.5, q_gamma=0.99, q_iters=1, exp_epsilon=0.2, exp_decay=1.0) grid_params = GridParams(grid_size=grid_size, n_features=n_features, step_penalty=0.05, goal_value=float(grid_size), prob_rnd_move=0.05, world_seed=seed, initial_state="edge", grid_type="walls", max_number_of_actions_per_session=grid_size * 10) bolfi_params = BolfiParams( bounds=p.get_bounds(), acq_noise_cov=p.get_acq_noises(), noise_var=0.1, kernel_var=10.0, kernel_scale=p.get_lengthscales(), kernel_prior={ "scale_E": 0.1, "scale_V": 0.3, "var_E": 5.0, "var_V": 10.0 }, ARD=True, noisy_posterior=noisy_posterior, model_scale=model_scale, n_samples=samples, n_initial_evidence=0, parallel_batches=parallel_batches, gp_params_optimizer="simplex", gp_params_max_opt_iters=20, gp_params_update_interval=gp_params_update_interval, batch_size=1, sampling_type="bo", seed=seed) if n_features == 2: ground_truth_v = [-0.33, -0.67] ground_truth = {"feature1_value": -0.33, "feature2_value": -0.67} training_data = get_dataset(grid_params, elfi_params, rl_params, ground_truth_v, seed + 1, path_max_len, obs_set_size) test_data = get_dataset(grid_params, elfi_params, rl_params, ground_truth_v, seed + 2, path_max_len, obs_set_size) if n_features == 3: ground_truth_v = [-0.25, -0.5, -0.75] ground_truth = { "feature1_value": -0.25, "feature2_value": -0.5, "feature3_value": -0.75 } training_data = get_dataset(grid_params, elfi_params, rl_params, ground_truth_v, seed + 1, path_max_len, obs_set_size) test_data = get_dataset(grid_params, elfi_params, rl_params, ground_truth_v, seed + 2, path_max_len, obs_set_size) # hack test_training_disc = discrepancy_function( InitialStateUniformlyAtEdge(grid_size), training_data, observed=[test_data]) print("Discrepancy between test and training data was {:.4f}".format( test_training_disc[0])) if method in ["exact", "sample", "sample_l"]: types = ["MED", "LIK"] # hack from elfie.inference import SamplingPhase, PosteriorAnalysisPhase, PointEstimateSimulationPhase, PlottingPhase, GroundTruthErrorPhase, PredictionErrorPhase, LikelihoodSamplesSimulationPhase def modified_experiment(grid_params, elfi_params, rl_params, bolfi_params, obs_data, test_data, plot_data, types, replicates, region_size, ground_truth, n_cores, path_max_len, obs_set_size, pdf, figsize): elfi.new_model() model = get_model(method, grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() ret = dict() ret["n_cores"] = n_cores ret = SamplingPhase().run(inference_task, ret) ret = PosteriorAnalysisPhase(types=types).run(inference_task, ret) ret["plots_logl"] = inference_task.plot_post(pdf, figsize) elfi.new_model() model = get_model("approx", grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() ret = PointEstimateSimulationPhase(replicates=replicates, region_size=region_size).run( inference_task, ret) ret = LikelihoodSamplesSimulationPhase(replicates=replicates).run( inference_task, ret) ret = PlottingPhase(pdf=pdf, figsize=figsize, obs_data=obs_data, test_data=test_data, plot_data=plot_data).run(inference_task, ret) ret = GroundTruthErrorPhase(ground_truth=ground_truth).run( inference_task, ret) ret = PredictionErrorPhase(test_data=test_data).run( inference_task, ret) return ret exp = partial(modified_experiment, grid_params=grid_params, elfi_params=p, rl_params=rl_params, bolfi_params=bolfi_params, obs_data=training_data, test_data=test_data, plot_data=None, types=types, replicates=10, region_size=0.02, ground_truth=ground_truth, n_cores=cores, path_max_len=path_max_len, obs_set_size=obs_set_size) if method in ["approx", "approx_l"]: types = ["ML", "LIK"] model = get_model(method, grid_params, elfi_params, rl_params, training_data, path_max_len, obs_set_size) inference_factory = BolfiFactory(model, bolfi_params) exp = partial(inference_experiment, inference_factory, test_data=test_data, obs_data=training_data, plot_data=None, types=types, ground_truth=ground_truth, n_cores=cores, replicates=10, region_size=0.02) if method == "random": types = ["MD"] # hack from elfie.inference import PointEstimateSimulationPhase, PlottingPhase, GroundTruthErrorPhase, PredictionErrorPhase def modified_experiment(grid_params, elfi_params, rl_params, bolfi_params, obs_data, test_data, plot_data, types, replicates, region_size, ground_truth, n_cores, path_max_len, obs_set_size, seed, pdf, figsize): elfi.new_model() model = get_model("approx", grid_params, p.get_elfi_params(), rl_params, obs_data, path_max_len, obs_set_size) inference_task = BolfiFactory(model, bolfi_params).get() bounds = elfi_params.get_bounds() ret = dict() ret["n_cores"] = n_cores ret["MD"] = dict() random_state = np.random.RandomState(seed) for k, v in bounds.items(): ret["MD"][k] = random_state.uniform(v[0], v[1]) print("Random location: {}".format(ret["MD"])) ret["sampling_duration"] = 0 ret["samples"] = dict() ret["n_samples"] = 0 ret = PointEstimateSimulationPhase(replicates=replicates, region_size=region_size).run( inference_task, ret) ret = PlottingPhase(pdf=pdf, figsize=figsize, obs_data=obs_data, test_data=test_data, plot_data=plot_data).run(inference_task, ret) ret = GroundTruthErrorPhase(ground_truth=ground_truth).run( inference_task, ret) ret = PredictionErrorPhase(test_data=test_data).run( inference_task, ret) return ret exp = partial(modified_experiment, grid_params=grid_params, elfi_params=p, rl_params=rl_params, bolfi_params=bolfi_params, obs_data=training_data, test_data=test_data, plot_data=None, types=types, replicates=10, region_size=0.02, ground_truth=ground_truth, n_cores=cores, path_max_len=path_max_len, obs_set_size=obs_set_size, seed=seed) file_path = os.path.dirname(os.path.realpath(__file__)) run_and_report(exp, file_path)
def run_experiment(seed, method, scale, cores, samples): logger.info("Running learning model with parameters") logger.info(" * seed = {}".format(seed)) logger.info(" * method = {}".format(method)) logger.info(" * scale = {}".format(scale)) logger.info(" * cores = {}".format(cores)) logger.info(" * samples = {}".format(samples)) p = ModelParams([ { "name": "RT", "distr": "uniform", "minv": -2.60000001, "maxv": -2.6, "acq_noise": 0.0, "kernel_scale": 0.4, # 20% of range "L": 2.5, # 5 units / range "ntics": scale, }, { "name": "LF", "distr": "truncnorm", "minv": 0.1, "maxv": 0.1000001, "mean": 0.2, "std": 0.2, "acq_noise": 0.0, "kernel_scale": 0.03, # 20% of range "L": 33.3, # 5 units / range "ntics": scale, }, { "name": "BLC", "distr": "truncnorm", "minv": 2.0, "maxv": 2.0000001, "mean": 10.0, "std": 10.0, "acq_noise": 0.0, "kernel_scale": 4.0, # 20% of range "L": 0.25, # 5 units / range "ntics": scale, }, { "name": "ANS", "distr": "truncnorm", "minv": 0.001, "maxv": 0.001000001, "mean": 0.3, "std": 0.2, "acq_noise": 0.0, "kernel_scale": 0.03, # 20% of range "L": 33.3, # 5 units / range "ntics": scale, } ]) if method == "bo": gp_params_update_interval = min(samples, 50) types = ["MED", "MAP", "POST"] else: gp_params_update_interval = 9999 types = ["MD"] grid_tics = None if method == "grid": parallel_batches = samples grid_tics = p.get_grid_tics(seed) else: parallel_batches = cores - 1 training_data = get_dataset() model_params = LearningParams(max_retries=20) bolfi_params = BolfiParams( bounds=p.get_bounds(), grid_tics=grid_tics, acq_noise_cov=p.get_acq_noises(), noise_var=0.0001, kernel_var=1.0, kernel_scale=p.get_lengthscales(), kernel_prior={ "scale_E": 7.5, "scale_V": 37.5, "var_E": 2.0, "var_V": 2.0, "noise_E": 0.0004, "noise_V": 0.00000004 }, L=p.get_L(), ARD=True, n_samples=samples, n_initial_evidence=0, parallel_batches=parallel_batches, gp_params_update_interval=gp_params_update_interval, gp_params_optimizer="simplex", gp_params_max_opt_iters=100, abc_threshold_delta=0.01, batch_size=1, sampling_type=method, seed=seed) model = get_model(model_params, p.get_elfi_params(), training_data) inference_factory = BolfiFactory(model, bolfi_params) file_path = os.path.dirname(os.path.realpath(__file__)) exp = partial(inference_experiment, inference_factory, obs_data=training_data, test_data=training_data, plot_data=plot_data, types=types, n_cores=cores, replicates=10, region_size=0.02) run_and_report(exp, file_path)