def fox_sample(lagged_values, lagged_times, delay, num, name, as_process=None): " Elementary but not completely woeful sampler, used by Malaxable Fox" dt = approx_dt(lagged_times) lag = max(10, math.ceil(delay / dt)) print('lag = ' + str(lag)) is_proc = as_process or ('~' not in name and StatsConventions.is_process(lagged_values)) if len(lagged_values) < 250 + lag or not is_proc: values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=as_process) ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=values, lagged_values=lagged_values)) else: changes = np.diff(list(reversed(lagged_values)), n=lag) counter = dict(Counter(changes)) d = dict(counter) num_total = len(changes) d1 = dict([(change, round(175 * change_count / num_total)) for change, change_count in d.items()]) values = list() for change, rounded_count in d1.items(): values.extend([change] * rounded_count) change_spray = list(range(-50, 50)) values.extend(change_spray) change_values = values[:num] abs_values = [lagged_values[0] + chg for chg in change_values] if not len(abs_values) == num: # Too many rounded down ... may not be discrete abs_values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=True) ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=abs_values, lagged_values=lagged_values)) return ret_values
def test_mean_percentile(): zscores = np.random.randn(100) normcdf = StatsConventions._normcdf_function() norminv = StatsConventions._norminv_function() p = [normcdf(z) for z in zscores] avg_p = StatsConventions.zmean_percentile(p) implied_avg = norminv(avg_p) actual_avg = np.mean(zscores) assert abs(implied_avg - actual_avg) < 1e-4
def independent_gaussian_samples(lagged, num): shrunk_std = np.nanstd(list(lagged) + [0.01, -0.01]) shrunk_mean = np.nanmean(lagged + [0.0]) return [ shrunk_mean + shrunk_std * StatsConventions.norminv(p) for p in evenly_spaced_percentiles(num) ]
def normal_sample(prediction_mean: float, prediction_std: float, num: int) -> [float]: """ Returns normally distributed samples evenly spaced in probability """ return [ prediction_mean + prediction_std * StatsConventions.norminv(p) for p in evenly_spaced_percentiles(num=num) ]
def to_zcurve(self, prctls: List[float]): """ A mapping from I^n -> R based on the Morton z-curve """ SAFE = False dim = len(prctls) if dim == 1: return self.to_zscores(prctls)[0] else: zpercentile = self.from_cube(prctls=prctls) return StatsConventions.norminv(zpercentile)
def independent_bootstrap(lagged, decay, num): """ One parameter jiggled bootstrap favouring more recent observations lagged [ float ] List most recent observation first decay float Coefficient in exp(-a k) that weights samples num int Number of scenarios requested :returns [ float ] Statistical sample """ weights = list(np.exp([-decay * k for k in range(len(lagged))])) empirical_sample = _weighted_random_sample(population=lagged, weights=weights, num=num) return StatsConventions.nudged(empirical_sample)
def from_zcurve(self, zvalue, dim): zpercentile = StatsConventions.normcdf(zvalue) SCALE = self.morton_scale(dim) zmorton = int( self.morton_large(dim)*zpercentile+0.5 ) if dim==2: values = pymorton.deinterleave2(zmorton) elif dim==3: values = pymorton.deinterleave3(zmorton) else: raise NotImplementedError('Only 2d or 3d') prtcls = [ v/SCALE for v in values ] return prtcls
def test_discrete(): # Generate a PDF. # Calculate the likely CDF that will be returned. # Imply a PDF # # https://gist.github.com/microprediction/ea63388c2bbcfd7623bd9937723565b9 num = 7 cij = [[1.0] * k + [0.5] + [0.] * (num - k - 1) for k in range(num)] C = np.array(cij) probs = np.random.rand(7) probs = probs / sum(probs) cdf = np.matmul(C, np.transpose(np.array(probs))) pdf = StatsConventions.discrete_pdf(ys=cdf) assert all([abs(p1 - p2) < 1e-5 for p1, p2 in zip(probs, pdf)])
def to_zcurve(self, prctls: List[float] ): """ A mapping from R^n -> R based on the Morton z-curve """ SAFE = False dim = len(prctls) if dim==1: return self.to_zscores(prctls)[0] else: SCALE = self.morton_scale(dim) int_prctls = [ int(math.floor(p*SCALE)) for p in prctls ] m1 = pymorton.interleave(*int_prctls) if SAFE: int_prctls_back = pymorton.deinterleave2(m1) if dim==2 else pymorton.deinterleave3(m1) assert all( i1==i2 for i1,i2 in zip(int_prctls, int_prctls_back)) m2 = pymorton.interleave(*[ SCALE-1 for _ in range(dim) ]) zpercentile = m1/m2 return StatsConventions.norminv(zpercentile)
def univariate_trivariate(trial): u = trial.suggest_float('u', 1e-6, 1 - 1e-6) z = StatsConventions.norminv(u) u2 = zc.from_zcurve(zvalue=z, dim=3) return objective(scale_me(u2, scale))[0]
def test_cdf_invcdf(): normcdf = StatsConventions._normcdf_function() norminv = StatsConventions._norminv_function() for x in np.random.randn(100): x1 = norminv(normcdf(x)) assert abs(x - x1) < 1e-4
def test_absc(): sc = StatsConventions() xs = sc.percentile_abscissa() assert len(xs) > 5 xs = StatsConventions.percentile_abscissa() assert len(xs) > 5
def univariate(u): z = StatsConventions.norminv(u) u2 = zc.from_zcurve(zvalue=z, dim=3) return objective(scale_me(u2,scale))[0]
def inv_cdf(self, p: float) -> float: return self.mean + StatsConventions.norminv(p) * self.std()
def from_zcurve(self, zvalue, dim): zpercentile = StatsConventions.normcdf(zvalue) return self.to_cube(zpercentile=zpercentile, dim=dim)
def to_zscores(prctls): norminv = StatsConventions._norminv_function() return [norminv(p) for p in prctls]