Ejemplo n.º 1
0
def test_lognormal_generator_step():
    """Test to reassure."""
    mu = 50
    sigma = 3
    low = 2
    high = 100
    base = 10

    step = 2
    size = 100000

    mu_log = logb(mu, base)
    sigma_log = logb(sigma, base)
    low_log = logb(low, base)
    high_log = logb(high, base)
    samples = sample_generators["lognormal"](size=size,
                                             mu_log=mu_log,
                                             sigma_log=sigma_log,
                                             low_log=low_log,
                                             low=low,
                                             high_log=high_log,
                                             base=base,
                                             step=step)

    assert np.sum(samples % 2) == 0
    assert np.sum(samples < low) == 0
    assert np.sum(samples >= high) == 0
Ejemplo n.º 2
0
def test_loguniform_pdf():

    low = 10**1.125
    high = 10**4.365
    step = None
    base = 10

    size = 10000
    bins = 100
    epsilon = 1e-1

    low_log = logb(low, base)
    high_log = logb(high, base)

    samples = sample_generators["loguniform"](
        size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base
    )
    hist, bin_edges = np.histogram(samples, bins=bins, density=True)
    densities = probability_density_function["loguniform"](
        samples=(bin_edges[1:] + bin_edges[:-1]) * 0.5,
        low_log=low_log,
        high_log=high_log,
        low=low,
        high=high,
        step=step,
        base=base,
    )
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert ((hist - densities) / densities).mean() <= epsilon
Ejemplo n.º 3
0
def test_loguniform_step_pdf():

    low = 10**0
    high = 10**3
    step = low
    base = 10
    size = 100000
    epsilon = 1e-1

    low_log = logb(low, base)
    high_log = logb(high, base)

    samples = sample_generators["loguniform"](
        size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base
    )

    hist, bin_edges = np.histogram(
        samples, bins=np.arange(low - step / 10, high, step), density=True
    )
    densities = probability_density_function["loguniform"](
        samples=bin_edges[0:-1] + step / 10,
        low_log=low_log,
        high_log=high_log,
        low=low,
        high=high,
        step=step,
        base=base,
    )
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert ((hist - densities) / densities).mean() <= epsilon
Ejemplo n.º 4
0
def validate_loguniform(search_space):
    # error = "Expected a type dict with mandatory keys : [low, high] and optional key [log]"
    if type(search_space) != dict:
        raise ValidationError(message_key="search_space_type")

    search_space = search_space.copy()

    if "low" not in search_space.keys():
        raise ValidationError(message_key="low_mandatory")

    if "high" not in search_space.keys():
        raise ValidationError(message_key="high_mandatory")

    if type(search_space["low"]) not in (int, float):
        raise ValidationError(message_key="low_type")

    if type(search_space["high"]) not in (int, float):
        raise ValidationError(message_key="high_type")

    if search_space["low"] <= 0:
        raise ValidationError(message_key="low_inferior_0")

    if search_space["high"] <= search_space["low"]:
        raise ValidationError(message_key="high_inferior_low")

    if "step" in search_space.keys():
        if search_space["step"] and type(
                search_space["step"]) not in (int, float):
            raise ValidationError(message_key="step_type")
        if search_space["step"] and search_space["step"] >= max(
            [np.abs(search_space["high"]),
             np.abs(search_space["low"])]):
            raise ValidationError(message_key="high_inferior_step")

    if search_space.get("base") and type(search_space.get("base")) not in (
            float,
            int,
    ):
        raise ValidationError(message_key="base_type")

    search_space.setdefault("step", None)
    search_space.setdefault("base", 10)

    with np.errstate(divide="ignore"):  # Low can be 0
        search_space["low_log"] = logb(search_space["low"],
                                       search_space["base"])
    search_space["high_log"] = logb(search_space["high"], search_space["base"])

    return search_space
Ejemplo n.º 5
0
def test_lognormal_pdf_step():
    mu = 50
    sigma = 20
    low = 2
    high = 100
    base = 10

    step = 2
    size = 100000
    epsilon = 1e-1

    mu_log = logb(mu, base)
    sigma_log = logb(sigma, base)
    low_log = logb(low, base)
    high_log = logb(high, base)

    samples = sample_generators["lognormal"](
        size=size,
        low=low,
        high=high,
        base=base,
        step=step,
        mu=mu,
        mu_log=mu_log,
        sigma_log=sigma_log,
        low_log=low_log,
        high_log=high_log,
        sigma=sigma,
    )

    hist, bin_edges = np.histogram(samples,
                                   bins=np.arange(low - step / 10, high, step),
                                   density=True)
    densities = probability_density_function["lognormal"](
        samples=bin_edges[0:-1] + step / 10,
        low=low,
        high=high,
        base=base,
        step=step,
        mu_log=mu_log,
        sigma_log=sigma_log,
        low_log=low_log,
        high_log=high_log,
        mu=mu,
        sigma=sigma,
    )
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert np.sum(densities[(bin_edges[1:] + bin_edges[:-1]) * 0.5 < low]) == 0
    assert ((hist - densities) / densities).mean() <= epsilon
Ejemplo n.º 6
0
def loguniform_cdf(samples, low, low_log, high, high_log, base):
    """Evaluate (truncated)(discrete)normal cumulated probability density function for each sample.

    Integral of below pdf between base ** low and sample
    """
    values = (logb(samples, base) - low_log) / (high_log - low_log)
    values[(samples < low) + (samples >= high)] = 0
    return values
Ejemplo n.º 7
0
def test_lognormal_generator():
    """Test to reassure."""
    base = 10
    mu = 1e-5
    sigma = 1e2
    low = 1e-7
    high = 1e1
    step = None
    size = 1000000
    epsilon = 1e-1

    mu_log = logb(mu, base)
    sigma_log = logb(sigma, base)
    low_log = logb(low, base)
    high_log = logb(high, base)
    samples = logb(
        sample_generators["lognormal"](
            size=size,
            mu_log=mu_log,
            sigma_log=sigma_log,
            low_log=low_log,
            low=low,
            high_log=high_log,
            base=base,
            step=step,
        ),
        base,
    )
    a = (low_log - mu_log) / sigma_log
    b = (high_log - mu_log) / sigma_log
    # Median
    theorical_median = stats.truncnorm.median(a=a,
                                              b=b,
                                              loc=mu_log,
                                              scale=sigma_log)
    assert np.abs(np.median(samples) -
                  theorical_median) / theorical_median < epsilon
    # mean (expected value)
    theorical_mean = stats.truncnorm.mean(a=a,
                                          b=b,
                                          loc=mu_log,
                                          scale=sigma_log)
    assert np.abs(np.mean(samples) - theorical_mean) / theorical_mean < epsilon
    assert np.sum(samples < low_log) == 0
    assert np.sum(samples >= high_log) == 0
Ejemplo n.º 8
0
def test_loguniform_step_generator():

    low = 10**2
    high = 10**4
    step = 2
    base = 10

    size = 10000

    low_log = logb(low, base)
    high_log = logb(high, base)

    samples = sample_generators["loguniform"](
        size=size, low=low, high=high, low_log=low_log, high_log=high_log, step=step, base=base
    )
    assert np.sum(samples % step) == 0
    assert np.sum(samples < low) == 0
    assert np.sum(samples >= high) == 0
def test_loguniform_generator():

    low = 10**-7.23
    high = 10**-6.569
    step = None
    base = 10

    size = 100000
    epsilon = 1e-1

    low_log = logb(low, base)
    high_log = logb(high, base)

    samples = sample_generators["loguniform"](size=size,
                                              low=low,
                                              high=high,
                                              low_log=low_log,
                                              high_log=high_log,
                                              step=step,
                                              base=base)
    # Median
    theorical_median = base**(0.5 * (logb(high, base) + logb(low, base)))
    assert np.abs(np.median(samples) -
                  theorical_median) / theorical_median < epsilon
    # mean (expected value)
    theorical_mean = (high - low) / (
        (logb(high, base) - logb(low, base)) * np.log(base))
    assert np.abs(np.mean(samples) - theorical_mean) / theorical_mean < epsilon
    assert np.sum(samples < low) == 0
    assert np.sum(samples >= high) == 0
Ejemplo n.º 10
0
def build_posterior_lognormal(observed_values, observed_weights, parameter,
                              prior_weight):
    low_log = parameter.search_space["low_log"]
    high_log = parameter.search_space["high_log"]
    base = parameter.search_space["base"]

    # build log prior mu and sigma
    prior_mu_log = parameter.search_space["mu_log"]
    prior_sigma_log = parameter.search_space["sigma_log"]

    # Build mus and sigmas centered on each observation, taking care of the prior
    mus_log, sigmas_log, index = find_sigmas_mus(
        observed_mus=logb(observed_values, base),
        prior_mu=prior_mu_log,
        prior_sigma=prior_sigma_log,
        low=low_log,
        high=high_log,
    )

    # Back from log scale
    mus = base**mus_log
    sigmas = base**sigmas_log

    sum_observed_weights = sum(observed_weights)
    posterior_parameter = Parameter.from_dict({
        "name": parameter.name,
        "category": "mixture",
        "search_space": {
            "parameters": [{
                "category": "lognormal",
                "search_space": {
                    "mu": mu.tolist(),
                    "sigma": sigma.tolist(),
                    "low": parameter.search_space["low"],
                    "high": parameter.search_space["high"],
                    "step": parameter.search_space["step"],
                    "base": parameter.search_space["base"],
                },
            } for mu, sigma in zip(mus, sigmas)],
            "weights":
            np.array([
                x * (1 - prior_weight) / sum_observed_weights
                for x in observed_weights
            ] + [prior_weight])[index].tolist(),
        },
    })

    return posterior_parameter
Ejemplo n.º 11
0
def validate_lognormal(search_space):
    # error = "Expected a type dict with mandatory keys : [mu, sigma] and optional key log  or step"
    if type(search_space) != dict:
        raise ValidationError(message_key="search_space_type")

    search_space = search_space.copy()

    if "mu" not in search_space.keys():
        raise ValidationError(message_key="mu_mandatory")

    if type(search_space["mu"]) not in (int, float):
        raise ValidationError(message_key="mu_type")

    if search_space["mu"] <= 0:
        raise ValidationError(message_key="mu_inferior_0")

    if "sigma" not in search_space.keys():
        raise ValidationError(message_key="sigma_mandatory")

    if type(search_space["sigma"]) not in (int, float):
        raise ValidationError(message_key="sigma_type")

    if search_space["sigma"] < 1:
        raise ValidationError(message_key="sigma_inferior_1")

    if "low" in search_space.keys():
        if type(search_space["low"]) not in (int, float):
            raise ValidationError(message_key="low_type")
        if search_space["low"] <= 0:
            raise ValidationError(message_key="low_inferior_0")

    if "high" in search_space.keys():
        if type(search_space["high"]) not in (int, float):
            raise ValidationError(message_key="high_type")

    if "high" in search_space.keys() and "low" in search_space.keys():
        if search_space["high"] <= search_space["low"]:
            raise ValidationError(message_key="high_inferior_low")

    search_space.setdefault("low", 0)
    search_space.setdefault("high", np.inf)

    if "step" in search_space.keys():
        if search_space["step"] and type(
                search_space["step"]) not in (int, float):
            raise ValidationError(message_key="step_type")
        if search_space["step"] and search_space["step"] >= max(
            [np.abs(search_space["high"]),
             np.abs(search_space["low"])]):
            raise ValidationError(message_key="high_inferior_step")

    if search_space.get("base"):
        if type(search_space["base"]) not in (
                float,
                int,
        ):
            raise ValidationError(message_key="base_type")
        if search_space["base"] <= 0:
            raise ValidationError(message_key="base_inferior_0")

    search_space.setdefault("step", None)
    search_space.setdefault("base", 10)

    with np.errstate(divide="ignore"):  # Low can be 0
        search_space["low_log"] = logb(search_space["low"],
                                       search_space["base"])
    search_space["high_log"] = logb(search_space["high"], search_space["base"])
    search_space["mu_log"] = logb(search_space["mu"], search_space["base"])
    search_space["sigma_log"] = logb(search_space["sigma"],
                                     search_space["base"])

    return search_space