def test_normalization_point_densities_condition(point_densities): original = PointDensityCondition(point_densities["xs"], point_densities["densities"]) normalized_denormalized = original.normalize(Scale(10, 1000)).denormalize( Scale(10, 1000)) for (density, norm_denorm_density) in zip(point_densities["densities"], normalized_denormalized.densities): assert density == pytest.approx( norm_denorm_density, rel=0.001, ) for (x, norm_denorm_x) in zip(point_densities["xs"], normalized_denormalized.xs): assert x == pytest.approx( norm_denorm_x, rel=0.001, ) # half-assed test that xs and densities are at least # getting transformed in the right direction normalized = original.normalize(Scale(1, 4)) for idx, (normalized_x, normalized_density) in enumerate( zip(normalized.xs, normalized.densities)): orig_x = point_densities["xs"][idx] orig_density = point_densities["densities"][idx] assert orig_x > normalized_x assert orig_density < normalized_density
def test_logistic_mixture_normalization(): scale = Scale(-50, 50) scalex2 = Scale(-100, 100) mixture = LogisticMixture( components=[Logistic(-40, 1, scale), Logistic(50, 10, scale)], probs=[0.5, 0.5], ) mixturex2 = LogisticMixture( components=[Logistic(-80, 2, scalex2), Logistic(100, 20, scalex2)], probs=[0.5, 0.5], ) assert mixturex2 == mixture.normalize().denormalize(scalex2) assert mixture == mixturex2.normalize().denormalize(scale) normalized = (mixture.normalize() ) # not necessary to normalize but here for readability assert normalized == LogisticMixture( [Logistic(0.1, 0.01, Scale(0, 1)), Logistic(1, 0.1, Scale(0, 1))], [0.5, 0.5], )
def __init__( self, loc: float, s: float, scale: Optional[Scale] = None, metadata=None, normalized=False, ): # TODO (#303): Raise ValueError on scale < 0 if normalized: self.loc = loc self.s = np.max([s, 0.0000001]) self.metadata = metadata if scale is not None: self.scale = scale else: self.scale = Scale(0, 1) self.true_s = self.s * self.scale.width self.true_loc = self.scale.denormalize_point(loc) elif scale is None: raise ValueError("Either a Scale or normalized parameters are required") else: self.loc = scale.normalize_point(loc) self.s = np.max([s, 0.0000001]) / scale.width self.scale = scale self.metadata = metadata self.true_s = s # convenience field only used in repr currently self.true_loc = loc # convenience field only used in repr currently
def __init__( self, xs, densities, scale: Scale, normalized=False, traceable=False, cumulative_normed_ps=None, ): if scale is None: raise ValueError self.scale = scale if normalized: self.normed_xs = xs self.normed_densities = densities else: self.normed_xs = scale.normalize_points(xs) self.normed_densities = scale.normalize_densities( self.normed_xs, densities) self.cumulative_normed_ps = cumulative_normed_ps if cumulative_normed_ps is None: self.cumulative_normed_ps = np.append(np.array([0]), np.cumsum(self.bin_probs))
def from_pairs(cls, pairs, scale: Scale, normalized=False, interpolate=True): sorted_pairs = sorted([(v["x"], v["density"]) for v in pairs]) xs = np.array([x for (x, density) in sorted_pairs]) densities = np.array([density for (x, density) in sorted_pairs]) if not normalized: xs = scale.normalize_points(xs) densities = scale.normalize_densities(xs, densities) if interpolate: # interpolate ps at target_xs if not (len(xs) == len(constants.target_xs) and np.isclose(xs, constants.target_xs, rtol=1e-04).all()): f = interp1d(xs, densities) densities = f(constants.target_xs) # Make sure AUC is 1 auc = np.sum(densities) / densities.size densities /= auc return cls(constants.target_xs, densities, scale=scale, normalized=True)
def denormalize(self, scale: Scale): denormalized_min = (scale.denormalize_point(self.min) if self.min is not None else None) denormalized_max = (scale.denormalize_point(self.max) if self.max is not None else None) return self.__class__(self.p, denormalized_min, denormalized_max, self.weight)
def test_log_pdf(xscale: Scale): normed_test_loc = 0.5 normed_test_s = 0.1 test_loc = xscale.denormalize_point(normed_test_loc) test_s = normed_test_s * xscale.width ergoLogisticMixture = LogisticMixture( components=[ Logistic( loc=xscale.denormalize_point(0.2), s=0.5 * xscale.width, scale=xscale, ), Logistic(loc=test_loc, s=test_s, scale=xscale), ], probs=[1.8629593e-29, 1.0], ) ergoLogistic = Logistic(loc=test_loc, s=test_s, scale=xscale) ## Test PDF normed_scipydist = scipy.stats.logistic(normed_test_loc, normed_test_s) for x in np.linspace(0, 1, 10): denormalized_x = xscale.denormalize_point(x) assert (normed_scipydist.pdf(x) / xscale.width == pytest.approx( float(ergoLogistic.pdf(denormalized_x)), rel=1e-3) == pytest.approx(float(ergoLogisticMixture.pdf(denormalized_x)), rel=1e-3))
def test_variance_condition(): base_conditions = [ MaxEntropyCondition(weight=0.001), SmoothnessCondition(), IntervalCondition(p=0.95, min=0.3, max=0.7), ] base_dist = PointDensity.from_conditions(base_conditions, verbose=True, scale=Scale(0, 1)) base_variance = base_dist.variance() increased_variance = base_variance + 0.01 # Increase in variance should decrease peak var_condition = VarianceCondition(variance=increased_variance, weight=1) var_conditions = base_conditions + [var_condition] var_dist = PointDensity.from_conditions(var_conditions, verbose=True, scale=Scale(0, 1)) assert np.max(var_dist.normed_densities) < np.max( base_dist.normed_densities) # Highly weighted variance condition should make var very close to specified var strong_condition = VarianceCondition(variance=increased_variance, weight=100000) strong_var_conditions = base_conditions + [strong_condition] strong_var_dist = PointDensity.from_conditions(strong_var_conditions, verbose=True, scale=Scale(0, 1)) assert strong_var_dist.variance() == pytest.approx( float(increased_variance), abs=0.001)
def test_cdf(xscale: Scale): scipydist_normed = scipy.stats.logistic(0.5, 0.05) true_loc = xscale.denormalize_point(0.5) true_s = 0.05 * xscale.width ergodist = Logistic(loc=true_loc, s=true_s, scale=xscale) for x in np.linspace(0, 1, 10): assert scipydist_normed.cdf(x) == pytest.approx(float( ergodist.cdf(xscale.denormalize_point(x))), rel=1e-3) # TODO: consider a better approach for log scale if isinstance(xscale, LogScale): for x in np.linspace(xscale.low, xscale.high, 10): assert scipydist_normed.cdf( xscale.normalize_point(x)) == pytest.approx(float( ergodist.cdf(x)), rel=1e-3) else: scipydist_true = scipy.stats.logistic(true_loc, true_s) for x in np.linspace(xscale.low, xscale.high, 10): assert scipydist_true.cdf(x) == pytest.approx(float( ergodist.cdf(x)), rel=1e-3)
def normalized_logistic_mixture(): return LogisticMixture( components=[ Logistic(loc=0.15, s=0.037034005, scale=Scale(0, 1)), Logistic(loc=0.85, s=0.032395907, scale=Scale(0, 1)), ], probs=[0.6, 0.4], )
def denormalize(self, scale: Scale): denormed_base_dist = self.base_dist.denormalize(scale) denormed_floor = scale.denormalize_point(self.floor) denormed_ceiling = scale.denormalize_point(self.ceiling) return self.__class__( base_dist=denormed_base_dist, floor=denormed_floor, ceiling=denormed_ceiling, )
def test_density_norm_denorm_roundtrip(scale: Scale): rv = scipy.stats.logistic(loc=0.5, scale=0.15) normed_xs = np.linspace(0.01, 1, 201) normed_densities_truth_set = rv.pdf(normed_xs) xs = scale.denormalize_points(normed_xs) denormed_densities = scale.denormalize_densities( xs, normed_densities_truth_set) normed_densities = scale.normalize_densities(normed_xs, denormed_densities) assert np.allclose(normed_densities_truth_set, normed_densities) # type: ignore
def test_export_import(): log_scale = LogScale(low=-1, high=1, log_base=2) log_scale_export = log_scale.export() assert log_scale_export["width"] == 2 assert log_scale_export["class"] == "LogScale" assert (scale_factory(log_scale.export())) == log_scale linear_scale = Scale(low=1, high=10000) assert (scale_factory(linear_scale.export())) == linear_scale linear_date_scale = TimeScale(low=631152000, high=946684800) assert (scale_factory(linear_date_scale.export())) == linear_date_scale
def test_serialization(): assert hash(Scale(0, 100)) == hash(Scale(0, 100)) assert hash(Scale(0, 100)) != hash(Scale(100, 200)) assert hash(LogScale(0, 100, 10)) == hash(LogScale(0, 100, 10)) assert hash(LogScale(0, 100, 10)) != hash(LogScale(0, 100, 100)) assert hash(TimeScale(946684800, 1592914415)) == hash( TimeScale(946684800, 1592914415)) assert hash(TimeScale(631152000, 1592914415)) != hash( TimeScale(946684800, 1592914415)) assert (hash(LogScale(0, 100, 1)) != hash(Scale(0, 100)) != hash( TimeScale(631152000, 946684800)))
def prepare_logistic(self, normalized_dist: dist.Logistic) -> dist.Logistic: """ Transform a single logistic distribution by clipping the parameters and adding scale information as needed for submission to Metaculus. The loc and scale have to be within a certain range for the Metaculus API to accept the prediction. :param dist: a (normalized) logistic distribution :return: a transformed logistic distribution """ if hasattr(normalized_dist, "base_dist"): normalized_dist = normalized_dist.base_dist # type: ignore if normalized_dist.s <= 0: raise ValueError("logistic_params.scale must be greater than 0") clipped_loc = min(normalized_dist.loc, max_loc) clipped_scale = float(onp.clip(normalized_dist.s, min_scale, max_scale)) # type: ignore if self.low_open: low = float(onp.clip(normalized_dist.cdf(0), min_open_low, max_open_low,)) else: low = 0 if self.high_open: high = float( onp.clip(normalized_dist.cdf(1), min_open_high + low, max_open_high,) ) else: high = 1 return dist.Logistic( clipped_loc, clipped_scale, Scale(0, 1), {"low": low, "high": high} )
def __init__( self, logps=None, scale=None, traceable=False, direct_init=None, ): # We assume that xs are evenly spaced in [0,1] if direct_init: self.logps = direct_init["logps"] self.ps = direct_init["ps"] self.cum_ps = direct_init["cum_ps"] self.xs = direct_init["xs"] self.size = direct_init["size"] self.scale = direct_init["scale"] else: init_numpy = np if traceable else onp self.logps = logps self.ps = np.exp(logps) self.cum_ps = np.array(init_numpy.cumsum(self.ps)) self.size = logps.size self.scale = scale if scale else Scale(0, 1) self.xs = np.linspace(0, 1, self.logps.size) self.density_norm_term = self.scale.width / self.logps.size
def test_mixed_2(point_densities): conditions = ( PointDensityCondition(point_densities["xs"], point_densities["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) dist = LogisticMixture.from_conditions(conditions, {"num_components": 3}, verbose=True, scale=Scale(0, 1)) assert dist.pdf(-5) == pytest.approx(0, abs=0.1) assert dist.pdf(6) == pytest.approx(0, abs=0.1) my_cache = {} my_cache[conditions] = 3 conditions_2 = ( PointDensityCondition(point_densities["xs"], point_densities["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) assert hash(conditions) == hash(conditions_2) assert my_cache[conditions_2] == 3
def normalize(self): return PointDensity( self.normed_xs, self.normed_densities, scale=Scale(0.0, 1.0), normalized=True, )
def denormalize(self, scale: Scale): denormalized_xs = np.array( [scale.denormalize_point(x) for x in self.xs]) denormalized_densities = np.array( [density / scale.width for density in self.densities]) return self.__class__(denormalized_xs, denormalized_densities, self.weight)
def test_density_frompairs(): pairs = [ { "x": 0, "density": 1 }, { "x": 0.2, "density": 1 }, { "x": 0.4, "density": 1 }, { "x": 0.6, "density": 1 }, { "x": 1, "density": 1 }, ] dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1)) for condition in dist.percentiles(): assert condition.max == pytest.approx(condition.p, abs=0.01)
def logistic_mixture_norm_test(): xscale = Scale(-50, 50) return LogisticMixture( components=[Logistic(-40, 1, xscale), Logistic(50, 10, xscale)], probs=[0.5, 0.5], )
def test_zero_log_issue(): """ Regression test for a bug where 1. distribution is specified which has 0 density in some bins, and 2. a condition or method that uses self.normed_log_densities or similar is called """ pairs = [ { "x": 0, "density": 1 }, { "x": 0.2, "density": 0 }, { "x": 0.4, "density": 0 }, { "x": 0.6, "density": 1 }, { "x": 1, "density": 1 }, ] dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1)) sc = SmoothnessCondition() fit = sc.describe_fit(dist) assert not np.isnan(fit["loss"])
def from_pairs(cls, pairs, scale: Scale, normalized=False): sorted_pairs = sorted([(v["x"], v["density"]) for v in pairs]) xs = [x for (x, density) in sorted_pairs] if not normalized: xs = scale.normalize_points(xs) densities = [density for (x, density) in sorted_pairs] logps = onp.log(onp.array(densities)) return cls(logps, scale)
def logistic_mixture10(): xscale = Scale(-20, 40) return LogisticMixture( components=[ Logistic(loc=15, s=2.3658268, scale=xscale), Logistic(loc=5, s=2.3658268, scale=xscale), ], probs=[0.5, 0.5], )
def logistic_mixture_p_uneven(): xscale = Scale(-10, 20) return LogisticMixture( components=[ Logistic(loc=10, s=3, scale=xscale), Logistic(loc=5, s=5, scale=xscale), ], probs=[1.8629593e-29, 1.0], )
def logistic_mixture(): xscale = Scale(0, 150000) return LogisticMixture( components=[ Logistic(loc=10000, s=1000, scale=xscale), Logistic(loc=100000, s=10000, scale=xscale), ], probs=[0.8, 0.2], )
def test_mixture_from_percentile(): for value in [0.01, 0.1, 1, 3]: conditions = [IntervalCondition(p=0.5, max=value)] dist = LogisticMixture.from_conditions(conditions, {"num_components": 1}, verbose=True, scale=Scale(0, 3)) loc = dist.components[0].base_dist.true_loc assert loc == pytest.approx(value, rel=0.1), loc
def logistic_mixture15(): xscale = Scale(-10, 40) return LogisticMixture( components=[ Logistic(loc=10, s=3.658268, scale=xscale), Logistic(loc=20, s=3.658268, scale=xscale), ], probs=[0.5, 0.5], )
def test_add_endpoints(): xs = [0.25, 0.5, 0.75] standard_densities = [0.25, 0.5, 0.75] expected_densities = np.array([0, 0.25, 0.5, 0.75, 1]) _, densities = PointDensity.add_endpoints(xs, standard_densities, scale=Scale(0, 1)) assert densities == pytest.approx(expected_densities, abs=1e-5) to_clamp_densities = [0.1, 0.5, 0.1] expected_densities = np.array([0, 0.1, 0.5, 0.1, 0]) _, densities = PointDensity.add_endpoints(xs, to_clamp_densities, scale=Scale(0, 1)) assert densities == pytest.approx(expected_densities, abs=1e-5)
def smooth_logistic_mixture(): xscale = Scale(1, 1000000.0) return LogisticMixture( components=[ Logistic(loc=400000, s=100000, scale=xscale), Logistic(loc=700000, s=50000, scale=xscale), ], probs=[0.8, 0.2], )