Beispiel #1
0
def test_normalization_point_densities_condition(point_densities):
    original = PointDensityCondition(point_densities["xs"],
                                     point_densities["densities"])
    normalized_denormalized = original.normalize(Scale(10, 1000)).denormalize(
        Scale(10, 1000))
    for (density,
         norm_denorm_density) in zip(point_densities["densities"],
                                     normalized_denormalized.densities):
        assert density == pytest.approx(
            norm_denorm_density,
            rel=0.001,
        )
    for (x, norm_denorm_x) in zip(point_densities["xs"],
                                  normalized_denormalized.xs):
        assert x == pytest.approx(
            norm_denorm_x,
            rel=0.001,
        )

    # half-assed test that xs and densities are at least
    # getting transformed in the right direction
    normalized = original.normalize(Scale(1, 4))
    for idx, (normalized_x, normalized_density) in enumerate(
            zip(normalized.xs, normalized.densities)):
        orig_x = point_densities["xs"][idx]
        orig_density = point_densities["densities"][idx]
        assert orig_x > normalized_x
        assert orig_density < normalized_density
Beispiel #2
0
def test_variance_condition():
    base_conditions = [
        MaxEntropyCondition(weight=0.001),
        SmoothnessCondition(),
        IntervalCondition(p=0.95, min=0.3, max=0.7),
    ]
    base_dist = PointDensity.from_conditions(base_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))
    base_variance = base_dist.variance()
    increased_variance = base_variance + 0.01

    # Increase in variance should decrease peak
    var_condition = VarianceCondition(variance=increased_variance, weight=1)
    var_conditions = base_conditions + [var_condition]
    var_dist = PointDensity.from_conditions(var_conditions,
                                            verbose=True,
                                            scale=Scale(0, 1))
    assert np.max(var_dist.normed_densities) < np.max(
        base_dist.normed_densities)

    # Highly weighted variance condition should make var very close to specified var
    strong_condition = VarianceCondition(variance=increased_variance,
                                         weight=100000)
    strong_var_conditions = base_conditions + [strong_condition]
    strong_var_dist = PointDensity.from_conditions(strong_var_conditions,
                                                   verbose=True,
                                                   scale=Scale(0, 1))
    assert strong_var_dist.variance() == pytest.approx(
        float(increased_variance), abs=0.001)
Beispiel #3
0
def test_logistic_mixture_normalization():
    scale = Scale(-50, 50)
    scalex2 = Scale(-100, 100)
    mixture = LogisticMixture(
        components=[Logistic(-40, 1, scale),
                    Logistic(50, 10, scale)],
        probs=[0.5, 0.5],
    )

    mixturex2 = LogisticMixture(
        components=[Logistic(-80, 2, scalex2),
                    Logistic(100, 20, scalex2)],
        probs=[0.5, 0.5],
    )

    assert mixturex2 == mixture.normalize().denormalize(scalex2)
    assert mixture == mixturex2.normalize().denormalize(scale)

    normalized = (mixture.normalize()
                  )  # not necessary to normalize but here for readability

    assert normalized == LogisticMixture(
        [Logistic(0.1, 0.01, Scale(0, 1)),
         Logistic(1, 0.1, Scale(0, 1))],
        [0.5, 0.5],
    )
Beispiel #4
0
def normalized_logistic_mixture():
    return LogisticMixture(
        components=[
            Logistic(loc=0.15, s=0.037034005, scale=Scale(0, 1)),
            Logistic(loc=0.85, s=0.032395907, scale=Scale(0, 1)),
        ],
        probs=[0.6, 0.4],
    )
Beispiel #5
0
def test_serialization():
    assert hash(Scale(0, 100)) == hash(Scale(0, 100))
    assert hash(Scale(0, 100)) != hash(Scale(100, 200))

    assert hash(LogScale(0, 100, 10)) == hash(LogScale(0, 100, 10))
    assert hash(LogScale(0, 100, 10)) != hash(LogScale(0, 100, 100))

    assert hash(TimeScale(946684800, 1592914415)) == hash(
        TimeScale(946684800, 1592914415))
    assert hash(TimeScale(631152000, 1592914415)) != hash(
        TimeScale(946684800, 1592914415))

    assert (hash(LogScale(0, 100, 1)) != hash(Scale(0, 100)) != hash(
        TimeScale(631152000, 946684800)))
Beispiel #6
0
def test_zero_log_issue():
    """
    Regression test for a bug where
    1. distribution is specified which has 0 density in some bins, and
    2. a condition or method that uses self.normed_log_densities or similar is called
    """
    pairs = [
        {
            "x": 0,
            "density": 1
        },
        {
            "x": 0.2,
            "density": 0
        },
        {
            "x": 0.4,
            "density": 0
        },
        {
            "x": 0.6,
            "density": 1
        },
        {
            "x": 1,
            "density": 1
        },
    ]
    dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1))
    sc = SmoothnessCondition()
    fit = sc.describe_fit(dist)
    assert not np.isnan(fit["loss"])
Beispiel #7
0
 def normalize(self):
     return PointDensity(
         self.normed_xs,
         self.normed_densities,
         scale=Scale(0.0, 1.0),
         normalized=True,
     )
Beispiel #8
0
def test_mixed_2(point_densities):
    conditions = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 3},
                                           verbose=True,
                                           scale=Scale(0, 1))
    assert dist.pdf(-5) == pytest.approx(0, abs=0.1)
    assert dist.pdf(6) == pytest.approx(0, abs=0.1)
    my_cache = {}
    my_cache[conditions] = 3
    conditions_2 = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    assert hash(conditions) == hash(conditions_2)
    assert my_cache[conditions_2] == 3
Beispiel #9
0
    def __init__(
        self,
        logps=None,
        scale=None,
        traceable=False,
        direct_init=None,
    ):
        # We assume that xs are evenly spaced in [0,1]
        if direct_init:
            self.logps = direct_init["logps"]
            self.ps = direct_init["ps"]
            self.cum_ps = direct_init["cum_ps"]
            self.xs = direct_init["xs"]
            self.size = direct_init["size"]
            self.scale = direct_init["scale"]
        else:
            init_numpy = np if traceable else onp
            self.logps = logps
            self.ps = np.exp(logps)
            self.cum_ps = np.array(init_numpy.cumsum(self.ps))
            self.size = logps.size
            self.scale = scale if scale else Scale(0, 1)
            self.xs = np.linspace(0, 1, self.logps.size)

        self.density_norm_term = self.scale.width / self.logps.size
Beispiel #10
0
    def prepare_logistic(self, normalized_dist: dist.Logistic) -> dist.Logistic:
        """
        Transform a single logistic distribution by clipping the
        parameters and adding scale information as needed for submission to
        Metaculus. The loc and scale have to be within a certain range
        for the Metaculus API to accept the prediction.

        :param dist: a (normalized) logistic distribution
        :return: a transformed logistic distribution
        """
        if hasattr(normalized_dist, "base_dist"):
            normalized_dist = normalized_dist.base_dist  # type: ignore

        if normalized_dist.s <= 0:
            raise ValueError("logistic_params.scale must be greater than 0")

        clipped_loc = min(normalized_dist.loc, max_loc)
        clipped_scale = float(onp.clip(normalized_dist.s, min_scale, max_scale))  # type: ignore

        if self.low_open:
            low = float(onp.clip(normalized_dist.cdf(0), min_open_low, max_open_low,))
        else:
            low = 0

        if self.high_open:
            high = float(
                onp.clip(normalized_dist.cdf(1), min_open_high + low, max_open_high,)
            )
        else:
            high = 1

        return dist.Logistic(
            clipped_loc, clipped_scale, Scale(0, 1), {"low": low, "high": high}
        )
Beispiel #11
0
def logistic_mixture_norm_test():
    xscale = Scale(-50, 50)
    return LogisticMixture(
        components=[Logistic(-40, 1, xscale),
                    Logistic(50, 10, xscale)],
        probs=[0.5, 0.5],
    )
Beispiel #12
0
def test_density_frompairs():
    pairs = [
        {
            "x": 0,
            "density": 1
        },
        {
            "x": 0.2,
            "density": 1
        },
        {
            "x": 0.4,
            "density": 1
        },
        {
            "x": 0.6,
            "density": 1
        },
        {
            "x": 1,
            "density": 1
        },
    ]
    dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1))
    for condition in dist.percentiles():
        assert condition.max == pytest.approx(condition.p, abs=0.01)
Beispiel #13
0
 def __init__(
     self,
     loc: float,
     s: float,
     scale: Optional[Scale] = None,
     metadata=None,
     normalized=False,
 ):
     # TODO (#303): Raise ValueError on scale < 0
     if normalized:
         self.loc = loc
         self.s = np.max([s, 0.0000001])
         self.metadata = metadata
         if scale is not None:
             self.scale = scale
         else:
             self.scale = Scale(0, 1)
         self.true_s = self.s * self.scale.width
         self.true_loc = self.scale.denormalize_point(loc)
     elif scale is None:
         raise ValueError("Either a Scale or normalized parameters are required")
     else:
         self.loc = scale.normalize_point(loc)
         self.s = np.max([s, 0.0000001]) / scale.width
         self.scale = scale
         self.metadata = metadata
         self.true_s = s  # convenience field only used in repr currently
         self.true_loc = loc  # convenience field only used in repr currently
Beispiel #14
0
def test_mixture_from_percentile():
    for value in [0.01, 0.1, 1, 3]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = LogisticMixture.from_conditions(conditions,
                                               {"num_components": 1},
                                               verbose=True,
                                               scale=Scale(0, 3))
        loc = dist.components[0].base_dist.true_loc
        assert loc == pytest.approx(value, rel=0.1), loc
Beispiel #15
0
def test_add_endpoints():
    xs = [0.25, 0.5, 0.75]

    standard_densities = [0.25, 0.5, 0.75]
    expected_densities = np.array([0, 0.25, 0.5, 0.75, 1])

    _, densities = PointDensity.add_endpoints(xs,
                                              standard_densities,
                                              scale=Scale(0, 1))
    assert densities == pytest.approx(expected_densities, abs=1e-5)

    to_clamp_densities = [0.1, 0.5, 0.1]
    expected_densities = np.array([0, 0.1, 0.5, 0.1, 0])

    _, densities = PointDensity.add_endpoints(xs,
                                              to_clamp_densities,
                                              scale=Scale(0, 1))
    assert densities == pytest.approx(expected_densities, abs=1e-5)
Beispiel #16
0
def logistic_mixture_p_uneven():
    xscale = Scale(-10, 20)
    return LogisticMixture(
        components=[
            Logistic(loc=10, s=3, scale=xscale),
            Logistic(loc=5, s=5, scale=xscale),
        ],
        probs=[1.8629593e-29, 1.0],
    )
Beispiel #17
0
def logistic_mixture():
    xscale = Scale(0, 150000)
    return LogisticMixture(
        components=[
            Logistic(loc=10000, s=1000, scale=xscale),
            Logistic(loc=100000, s=10000, scale=xscale),
        ],
        probs=[0.8, 0.2],
    )
Beispiel #18
0
def logistic_mixture10():
    xscale = Scale(-20, 40)
    return LogisticMixture(
        components=[
            Logistic(loc=15, s=2.3658268, scale=xscale),
            Logistic(loc=5, s=2.3658268, scale=xscale),
        ],
        probs=[0.5, 0.5],
    )
Beispiel #19
0
def logistic_mixture15():
    xscale = Scale(-10, 40)
    return LogisticMixture(
        components=[
            Logistic(loc=10, s=3.658268, scale=xscale),
            Logistic(loc=20, s=3.658268, scale=xscale),
        ],
        probs=[0.5, 0.5],
    )
Beispiel #20
0
def smooth_logistic_mixture():
    xscale = Scale(1, 1000000.0)
    return LogisticMixture(
        components=[
            Logistic(loc=400000, s=100000, scale=xscale),
            Logistic(loc=700000, s=50000, scale=xscale),
        ],
        probs=[0.8, 0.2],
    )
Beispiel #21
0
def test_mixture_from_histogram(histogram):
    conditions = [HistogramCondition(histogram["xs"], histogram["densities"])]

    mixture = LogisticMixture.from_conditions(
        conditions,
        {"num_components": 3},
        Scale(min(histogram["xs"]), max(histogram["xs"])),
    )
    for (x, density) in zip(histogram["xs"], histogram["densities"]):
        assert mixture.pdf(x) == pytest.approx(density, abs=0.2)
Beispiel #22
0
 def __init__(
     self,
     id: int,
     metaculus: Any,
     data: Dict,
     name=None,
 ):
     super().__init__(id, metaculus, data, name)
     self.scale = Scale(float(self.question_range["min"]),
                        float(self.question_range["max"]))
Beispiel #23
0
    def normalize(self):
        """
        Return the normalized condition.

        :param scale: the true scale
        :return: the condition normalized to [0,1]
        """
        return self.__class__(
            self.loc, self.s, Scale(0, 1), self.metadata, normalized=True
        )
Beispiel #24
0
    def from_conditions(
        cls: Type[T],
        conditions: Sequence[Condition],
        fixed_params=None,
        scale=None,
        verbose=False,
        init_tries=1,
        opt_tries=1,
        jit_all=False,
    ) -> T:

        if fixed_params is None:
            fixed_params = {}

        if scale is None:
            scale = Scale(0.0, 1.0)

        fixed_params = cls.normalize_fixed_params(fixed_params, scale)
        normalized_conditions = [
            condition.normalize(scale) for condition in conditions
        ]
        cond_data = [
            condition.destructure() for condition in normalized_conditions
        ]
        if cond_data:
            cond_classes, cond_params = zip(*cond_data)
        else:
            cond_classes, cond_params = [], []

        if jit_all:
            jitted_loss = static.jitted_condition_loss
            jitted_jac = static.jitted_condition_loss_grad
        else:
            jitted_loss = static.condition_loss
            jitted_jac = static.condition_loss_grad

        def loss(opt_params):
            return jitted_loss(cls, fixed_params, opt_params, cond_classes,
                               cond_params)

        def jac(opt_params):
            return jitted_jac(cls, fixed_params, opt_params, cond_classes,
                              cond_params)

        normalized_dist = cls.from_loss(
            fixed_params=fixed_params,
            loss=loss,
            jac=jac,
            verbose=verbose,
            init_tries=init_tries,
            opt_tries=opt_tries,
        )

        return normalized_dist.denormalize(scale)
Beispiel #25
0
def test_mixture_from_percentiles():
    conditions = [
        IntervalCondition(p=0.1, max=1),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.6, max=3),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 4},
                                           verbose=False,
                                           scale=Scale(0, 3))
    for condition in conditions:
        assert dist.cdf(condition.max) == pytest.approx(condition.p, rel=0.1)
Beispiel #26
0
def test_weights_mixture():
    conditions = [
        IntervalCondition(p=0.4, max=1, weight=0.01),
        IntervalCondition(p=0.5, max=2, weight=100),
        IntervalCondition(p=0.8, max=2.2, weight=0.01),
        IntervalCondition(p=0.9, max=2.3, weight=0.01),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 1},
                                           verbose=True,
                                           scale=Scale(0, 3))
    assert dist.components[0].base_dist.true_loc == pytest.approx(2, rel=0.1)
Beispiel #27
0
def test_mode_condition():
    base_conditions = [IntervalCondition(p=0.4, max=0.5)]
    base_dist = PointDensity.from_conditions(base_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))

    # Most likely condition should increase chance of specified outcome
    outcome_conditions = base_conditions + [ModeCondition(outcome=0.25)]
    outcome_dist = PointDensity.from_conditions(outcome_conditions,
                                                verbose=True,
                                                scale=Scale(0, 1))
    assert outcome_dist.pdf(0.25) > base_dist.pdf(0.25)

    # Highly weighted most likely condition should make specified outcome most likely
    strong_condition = ModeCondition(outcome=0.25, weight=100000)
    strong_outcome_conditions = base_conditions + [strong_condition]
    strong_outcome_dist = PointDensity.from_conditions(
        strong_outcome_conditions, verbose=True, scale=Scale(0, 1))
    assert strong_condition.loss(strong_outcome_dist) == pytest.approx(
        0, abs=0.001)
Beispiel #28
0
def test_mean_condition():
    base_conditions = [MaxEntropyCondition(weight=0.1)]
    base_dist = PointDensity.from_conditions(base_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))
    base_mean = base_dist.mean()

    # Mean condition should move mean closer to specified mean
    mean_conditions = base_conditions + [MeanCondition(mean=0.25, weight=1)]
    mean_dist = PointDensity.from_conditions(mean_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))
    assert abs(mean_dist.mean() - 0.25) < abs(base_mean - 0.25)

    # Highly weighted mean condition should make mean very close to specified mean
    strong_condition = MeanCondition(mean=0.25, weight=100000)
    strong_mean_conditions = base_conditions + [strong_condition]
    strong_mean_dist = PointDensity.from_conditions(strong_mean_conditions,
                                                    verbose=True,
                                                    scale=Scale(0, 1))
    assert strong_mean_dist.mean() == pytest.approx(0.25, rel=0.01)
Beispiel #29
0
    def from_params(cls, fixed_params, opt_params, scale=None, traceable=True):
        if scale is None:
            scale = Scale(0.0, 1.0)
        xs = fixed_params["xs"]

        densities = nn.softmax(opt_params) * opt_params.size

        return cls(xs=xs,
                   densities=densities,
                   scale=scale,
                   normalized=True,
                   traceable=True)
Beispiel #30
0
def truncated_logistic_mixture():
    xscale = Scale(5000, 120000)
    return LogisticMixture(
        components=[
            Truncate(
                Logistic(loc=10000, s=1000, scale=xscale), floor=5000, ceiling=500000
            ),
            Truncate(
                Logistic(loc=100000, s=10000, scale=xscale), floor=5000, ceiling=500000
            ),
        ],
        probs=[0.8, 0.2],
    )