def test_lognormal_generator_step(): """Test to reassure.""" mu = 50 sigma = 3 low = 2 high = 100 base = 10 step = 2 size = 100000 mu_log = logb(mu, base) sigma_log = logb(sigma, base) low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["lognormal"](size=size, mu_log=mu_log, sigma_log=sigma_log, low_log=low_log, low=low, high_log=high_log, base=base, step=step) assert np.sum(samples % 2) == 0 assert np.sum(samples < low) == 0 assert np.sum(samples >= high) == 0
def test_loguniform_pdf(): low = 10**1.125 high = 10**4.365 step = None base = 10 size = 10000 bins = 100 epsilon = 1e-1 low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["loguniform"]( size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base ) hist, bin_edges = np.histogram(samples, bins=bins, density=True) densities = probability_density_function["loguniform"]( samples=(bin_edges[1:] + bin_edges[:-1]) * 0.5, low_log=low_log, high_log=high_log, low=low, high=high, step=step, base=base, ) assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert ((hist - densities) / densities).mean() <= epsilon
def test_loguniform_step_pdf(): low = 10**0 high = 10**3 step = low base = 10 size = 100000 epsilon = 1e-1 low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["loguniform"]( size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base ) hist, bin_edges = np.histogram( samples, bins=np.arange(low - step / 10, high, step), density=True ) densities = probability_density_function["loguniform"]( samples=bin_edges[0:-1] + step / 10, low_log=low_log, high_log=high_log, low=low, high=high, step=step, base=base, ) assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert ((hist - densities) / densities).mean() <= epsilon
def validate_loguniform(search_space): # error = "Expected a type dict with mandatory keys : [low, high] and optional key [log]" if type(search_space) != dict: raise ValidationError(message_key="search_space_type") search_space = search_space.copy() if "low" not in search_space.keys(): raise ValidationError(message_key="low_mandatory") if "high" not in search_space.keys(): raise ValidationError(message_key="high_mandatory") if type(search_space["low"]) not in (int, float): raise ValidationError(message_key="low_type") if type(search_space["high"]) not in (int, float): raise ValidationError(message_key="high_type") if search_space["low"] <= 0: raise ValidationError(message_key="low_inferior_0") if search_space["high"] <= search_space["low"]: raise ValidationError(message_key="high_inferior_low") if "step" in search_space.keys(): if search_space["step"] and type( search_space["step"]) not in (int, float): raise ValidationError(message_key="step_type") if search_space["step"] and search_space["step"] >= max( [np.abs(search_space["high"]), np.abs(search_space["low"])]): raise ValidationError(message_key="high_inferior_step") if search_space.get("base") and type(search_space.get("base")) not in ( float, int, ): raise ValidationError(message_key="base_type") search_space.setdefault("step", None) search_space.setdefault("base", 10) with np.errstate(divide="ignore"): # Low can be 0 search_space["low_log"] = logb(search_space["low"], search_space["base"]) search_space["high_log"] = logb(search_space["high"], search_space["base"]) return search_space
def test_lognormal_pdf_step(): mu = 50 sigma = 20 low = 2 high = 100 base = 10 step = 2 size = 100000 epsilon = 1e-1 mu_log = logb(mu, base) sigma_log = logb(sigma, base) low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["lognormal"]( size=size, low=low, high=high, base=base, step=step, mu=mu, mu_log=mu_log, sigma_log=sigma_log, low_log=low_log, high_log=high_log, sigma=sigma, ) hist, bin_edges = np.histogram(samples, bins=np.arange(low - step / 10, high, step), density=True) densities = probability_density_function["lognormal"]( samples=bin_edges[0:-1] + step / 10, low=low, high=high, base=base, step=step, mu_log=mu_log, sigma_log=sigma_log, low_log=low_log, high_log=high_log, mu=mu, sigma=sigma, ) assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0 assert ((hist - densities) / densities).mean() <= epsilon
def loguniform_cdf(samples, low, low_log, high, high_log, base): """Evaluate (truncated)(discrete)normal cumulated probability density function for each sample. Integral of below pdf between base ** low and sample """ values = (logb(samples, base) - low_log) / (high_log - low_log) values[(samples < low) + (samples >= high)] = 0 return values
def test_lognormal_generator(): """Test to reassure.""" base = 10 mu = 1e-5 sigma = 1e2 low = 1e-7 high = 1e1 step = None size = 1000000 epsilon = 1e-1 mu_log = logb(mu, base) sigma_log = logb(sigma, base) low_log = logb(low, base) high_log = logb(high, base) samples = logb( sample_generators["lognormal"]( size=size, mu_log=mu_log, sigma_log=sigma_log, low_log=low_log, low=low, high_log=high_log, base=base, step=step, ), base, ) a = (low_log - mu_log) / sigma_log b = (high_log - mu_log) / sigma_log # Median theorical_median = stats.truncnorm.median(a=a, b=b, loc=mu_log, scale=sigma_log) assert np.abs(np.median(samples) - theorical_median) / theorical_median < epsilon # mean (expected value) theorical_mean = stats.truncnorm.mean(a=a, b=b, loc=mu_log, scale=sigma_log) assert np.abs(np.mean(samples) - theorical_mean) / theorical_mean < epsilon assert np.sum(samples < low_log) == 0 assert np.sum(samples >= high_log) == 0
def test_loguniform_step_generator(): low = 10**2 high = 10**4 step = 2 base = 10 size = 10000 low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["loguniform"]( size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base ) assert np.sum(samples % step) == 0 assert np.sum(samples < low) == 0 assert np.sum(samples >= high) == 0
def test_loguniform_generator(): low = 10**-7.23 high = 10**-6.569 step = None base = 10 size = 100000 epsilon = 1e-1 low_log = logb(low, base) high_log = logb(high, base) samples = sample_generators["loguniform"](size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base) # Median theorical_median = base**(0.5 * (logb(high, base) + logb(low, base))) assert np.abs(np.median(samples) - theorical_median) / theorical_median < epsilon # mean (expected value) theorical_mean = (high - low) / ( (logb(high, base) - logb(low, base)) * np.log(base)) assert np.abs(np.mean(samples) - theorical_mean) / theorical_mean < epsilon assert np.sum(samples < low) == 0 assert np.sum(samples >= high) == 0
def build_posterior_lognormal(observed_values, observed_weights, parameter, prior_weight): low_log = parameter.search_space["low_log"] high_log = parameter.search_space["high_log"] base = parameter.search_space["base"] # build log prior mu and sigma prior_mu_log = parameter.search_space["mu_log"] prior_sigma_log = parameter.search_space["sigma_log"] # Build mus and sigmas centered on each observation, taking care of the prior mus_log, sigmas_log, index = find_sigmas_mus( observed_mus=logb(observed_values, base), prior_mu=prior_mu_log, prior_sigma=prior_sigma_log, low=low_log, high=high_log, ) # Back from log scale mus = base**mus_log sigmas = base**sigmas_log sum_observed_weights = sum(observed_weights) posterior_parameter = Parameter.from_dict({ "name": parameter.name, "category": "mixture", "search_space": { "parameters": [{ "category": "lognormal", "search_space": { "mu": mu.tolist(), "sigma": sigma.tolist(), "low": parameter.search_space["low"], "high": parameter.search_space["high"], "step": parameter.search_space["step"], "base": parameter.search_space["base"], }, } for mu, sigma in zip(mus, sigmas)], "weights": np.array([ x * (1 - prior_weight) / sum_observed_weights for x in observed_weights ] + [prior_weight])[index].tolist(), }, }) return posterior_parameter
def validate_lognormal(search_space): # error = "Expected a type dict with mandatory keys : [mu, sigma] and optional key log or step" if type(search_space) != dict: raise ValidationError(message_key="search_space_type") search_space = search_space.copy() if "mu" not in search_space.keys(): raise ValidationError(message_key="mu_mandatory") if type(search_space["mu"]) not in (int, float): raise ValidationError(message_key="mu_type") if search_space["mu"] <= 0: raise ValidationError(message_key="mu_inferior_0") if "sigma" not in search_space.keys(): raise ValidationError(message_key="sigma_mandatory") if type(search_space["sigma"]) not in (int, float): raise ValidationError(message_key="sigma_type") if search_space["sigma"] < 1: raise ValidationError(message_key="sigma_inferior_1") if "low" in search_space.keys(): if type(search_space["low"]) not in (int, float): raise ValidationError(message_key="low_type") if search_space["low"] <= 0: raise ValidationError(message_key="low_inferior_0") if "high" in search_space.keys(): if type(search_space["high"]) not in (int, float): raise ValidationError(message_key="high_type") if "high" in search_space.keys() and "low" in search_space.keys(): if search_space["high"] <= search_space["low"]: raise ValidationError(message_key="high_inferior_low") search_space.setdefault("low", 0) search_space.setdefault("high", np.inf) if "step" in search_space.keys(): if search_space["step"] and type( search_space["step"]) not in (int, float): raise ValidationError(message_key="step_type") if search_space["step"] and search_space["step"] >= max( [np.abs(search_space["high"]), np.abs(search_space["low"])]): raise ValidationError(message_key="high_inferior_step") if search_space.get("base"): if type(search_space["base"]) not in ( float, int, ): raise ValidationError(message_key="base_type") if search_space["base"] <= 0: raise ValidationError(message_key="base_inferior_0") search_space.setdefault("step", None) search_space.setdefault("base", 10) with np.errstate(divide="ignore"): # Low can be 0 search_space["low_log"] = logb(search_space["low"], search_space["base"]) search_space["high_log"] = logb(search_space["high"], search_space["base"]) search_space["mu_log"] = logb(search_space["mu"], search_space["base"]) search_space["sigma_log"] = logb(search_space["sigma"], search_space["base"]) return search_space