def test_grid_search_single_sample_multivariate_normal(): """ Supposed to run into problems b/c nr splits > then nr_samples """ cv = 5 m = MultivariateNormalTransition() m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, cv=cv) df, w = data(1) m_grid.fit(df, w) assert m_grid.cv == cv
def test_gaussian_multiple_populations_crossval_kde(db_path, sampler): sigma_x = 1 sigma_y = 0.5 y_observed = 2 def model(args): return {"y": st.norm(args['x'], sigma_y).rvs()} models = [model] models = list(map(FunctionModel, models)) nr_populations = 4 population_size = ConstantPopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=st.norm(0, sigma_x)) ] parameter_perturbation_kernels = [ GridSearchCV( MultivariateNormalTransition(), {"scaling": np.logspace(-1, 1.5, 5)}, ) ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, transitions=parameter_perturbation_kernels, eps=MedianEpsilon(0.2), sampler=sampler, ) abc.new(db_path, {"y": y_observed}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].values sort_indices = np.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( np.hstack((-200, posterior_x[sort_indices], 200)), np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)), ) sigma_x_given_y = 1 / np.sqrt(1 / sigma_x**2 + 1 / sigma_y**2) mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2 expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = np.linspace(-8, 8) max_distribution_difference = np.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.052 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < 0.07 assert abs(std_emp - sigma_x_given_y) < 0.12
def test_two_competing_gaussians_multiple_population_adaptive_populatin_size(db_path, sampler): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # The prior over the model classes is uniform model_prior = RV("randint", 0, 2) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [Distribution(x=st.norm(mu_x_1, sigma)), Distribution(x=st.norm(mu_x_2, sigma))] # Particles are perturbed in a Gaussian fashion parameter_perturbation_kernels = [MultivariateNormalTransition() for _ in range(2)] # We plug all the ABC setup together nr_populations = 3 population_size = AdaptivePopulationSize(400, mean_cv=0.05, max_population_size=1000) abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["y"]), population_size, model_prior=model_prior, eps=MedianEpsilon(.2), sampler=sampler) # Finally we add meta data such as model names and define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=3) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): return st.norm(mu_x_model, sp.sqrt(sigma ** 2 + sigma ** 2)).pdf(y_observed) p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations-1 assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .07
def test_grid_search_multivariate_normal(): m = MultivariateNormalTransition() m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, n_jobs=1) df, w = data(20) m_grid.fit(df, w)
def __init__(self): mapping = {'a': MultivariateNormalTransition()} super().__init__(mapping=mapping)
def __init__(self): mapping = { 'a': LocalTransition(), ('b', ): MultivariateNormalTransition(), } super().__init__(mapping=mapping)
LAMBDA0_TRUE = 100 # initial order placement depth C_LAMBDA_TRUE = 10 # limit order placement depth coefficient DELTA_S_TRUE = 0.0010 # mean reversion strength parameter # prior range DELTA_MIN, DELTA_MAX = 0, 0.05 MU_MIN, MU_MAX = 0, 0.05 ALPHA_MIN, ALPHA_MAX = 0.05, 0.5 LAMBDA0_MIN, LAMBDA0_MAX = 50, 300 C_LAMBDA_MIN, C_LAMBDA_MAX = 1, 50 DELTAS_MIN, DELTAS_MAX = 0, 0.005 # Fixed Parameters PRICE_PATH_DIVIDER = 100 TIME_HORIZON = 3200 # time horizon P_0 = 238.745 * PRICE_PATH_DIVIDER # initial price MC_STEPS = 10**5 # MC steps to generate variance N_A = 125 # no. market makers = no. liquidity providers # SMCABC parameters: SMCABC_DISTANCE = AdaptivePNormDistance( p=2, scale_function=pyabc.distance.root_mean_square_deviation) SMCABC_POPULATION_SIZE = 30 SMCABC_SAMPLER = MulticoreEvalParallelSampler(ncores) SMCABC_TRANSITIONS = MultivariateNormalTransition() SMCABC_EPS = MedianEpsilon(0.01) SMCABC_ACCEPTOR = UniformAcceptor(use_complete_history=True) smcabc_minimum_epsilon = 0.0001 smcabc_max_nr_populations = 6 smcabc_min_acceptance_rate = SMCABC_POPULATION_SIZE / 25000
tmax = len(data) prior = Distribution(n01=RV('uniform', 0, int(data.max())), n02=RV('uniform', 0, 10 * int(data.max())), k=RV('randint', 1, 7), log_p=RV('uniform', 0, 6), p_inf=RV('uniform', 0.01, 0.03)) model = MyStochasticProcess(n, tmax, data) transition = AggregatedTransition( mapping={ # 'n01': DiscreteJumpTransition(domain=np.arange(int(data.max()))), # 'n02': DiscreteJumpTransition(domain=np.arange(10 * int(data.max()))), 'k': DiscreteJumpTransition(domain=k_domain, p_stay=0.7), ('n01', 'n02', 'log_p', 'p_inf'): MultivariateNormalTransition(scaling=0.8) }) db = "sqlite:///" + os.path.join(os.getcwd(), "n=3e5_new.db") with Pool(processes=5) as pool: abc = ABCSMC( model, prior, distance, transitions=transition, sampler=MappingSampler(pool.map), # SingleCoreSampler(), # population_size=AdaptivePopulationSize(100, max_population_size=500), # population_size=25 ) # abc.load(db, np.load('run_id.npy')) abc.load(db, 10)