def median_confidence_interval(dx, cutoff=.95): ''' cutoff is the significance level as a decimal between 0 and 1''' dx = sorted(dx, reverse=False) factor = statistics.NormalDist().inv_cdf((1 + cutoff) / 2) factor *= math.sqrt(len(dx)) # avoid doing computation twice lix = round(0.5 * (len(dx) - factor)) uix = round(0.5 * (1 + len(dx) + factor)) return (dx[lix], dx[uix])
def fit(self, X_train, y_train): y_col = y_train.columns[-1] self.variables = X_train.columns self.prior_probabilities = dict.fromkeys(y_train[y_col].unique()) for cls in self.prior_probabilities.keys(): count = len(y_train[y_train[y_col] == cls]) self.prior_probabilities[cls] = count / len(y_train) self.variable_distr_A1 = dict.fromkeys(X_train.columns) self.variable_distr_A2 = dict.fromkeys(X_train.columns) train_A1 = X_train[y_train[y_col] == 0] train_A2 = X_train[y_train[y_col] == 1] for var in X_train.columns: mu_A1 = train_A1[var].mean() sigma_A1 = train_A1[var].std() self.variable_distr_A1[var] = statistics.NormalDist(mu=mu_A1, sigma=sigma_A1) mu_A2 = train_A2[var].mean() sigma_A2 = train_A2[var].std() self.variable_distr_A2[var] = statistics.NormalDist(mu=mu_A2, sigma=sigma_A2)
def _statistics_(): print("Statistics") print("--") print(statistics.fmean([3.5, 4.0, 5.25])) print("--") print(round(statistics.geometric_mean([54, 24, 36]), 1)) print("--") temperature_feb = statistics.NormalDist.from_samples( [4, 12, -3, 2, 7, 14]) print(temperature_feb.mean) print(temperature_feb.stdev) print(temperature_feb.cdf(3)) print(temperature_feb.pdf(7) / temperature_feb.pdf(10)) el_niño = statistics.NormalDist(4, 2.5) temperature_feb += el_niño print(temperature_feb) statistics.NormalDist(mu=10.0, sigma=6.830080526611674) temperature_feb * (9 / 5) + 32 statistics.NormalDist(mu=50.0, sigma=12.294144947901014) print(temperature_feb.samples(3)) print("--------")
def terminate(self, measurement: float) -> bool: self._data.append(measurement) if len(self._data) > 10: (mu, sigma) = stats.norm.fit(self._data) current_fit = statistics.NormalDist(mu=mu, sigma=sigma) if self._last_fit: current_overlap = current_fit.overlap(other=self._last_fit) if current_overlap >= self._threshold: return True self._last_fit = current_fit return False
def normal_quantiles(length: int, mu: float = 0.0, sigma: float = 1.0) -> Generator[float, None, None]: norm_dist = statistics.NormalDist(mu=mu, sigma=sigma) splits = 1.0 / length current = splits count = 0 while count < length: if current >= 1.0: current = 0.9999999999999999 yield norm_dist.inv_cdf(current) current += splits count += 1
def run(self): df = self.time_series() ts = df[df.keys()[0]] current = ts[0] past = ts[1:] dist = statistics.NormalDist(past.mean(), past.std()) cdf = dist.cdf(current) if cdf > 0.99: return StrategyResult.RED elif cdf > 0.95: return StrategyResult.ORANGE else: return StrategyResult.GREEN
async def daily(self, ctx: MyContext): """Get some more experience...""" db_hunter: Player = await get_player(ctx.author, ctx.channel) _ = await ctx.get_translate_function() if db_hunter.prestige < 3: await ctx.send( _("❌ Your prestige level is not high enough yet. " "See `{ctx.prefix}prestige info` to learn more.")) return False now = datetime.datetime.now() if db_hunter.prestige_last_daily.date() == now.date(): nextturn = db_hunter.prestige_last_daily.date() - now.date() await ctx.send( _("❌ You already claimed your dailies today. Try again in: `{td}`", td=format_timedelta(nextturn, locale='en_US'))) return False max_experience = 20 * db_hunter.prestige distrib = statistics.NormalDist(max_experience / 2, max_experience / 6) added_experience = int(distrib.samples(1)[0]) added_experience = min(max(5, added_experience), max_experience + 5) await db_hunter.edit_experience_with_levelups(ctx, added_experience) db_hunter.prestige_last_daily = now db_hunter.prestige_dailies += 1 await db_hunter.save() if ctx.author.id == 618209176434507816: # This is just a prank for the guy who made me add the Normal Dist, # with "a tiny chance for it to become negative" # It's not really negative, but heh :) # It'll look like so. added_experience = -added_experience await ctx.send( _("💰️ You took {exp} experience out of the prestige bank. Come back soon!", exp=added_experience))
def subcommand_range(args): frames = {} for seed in range(256): filename = os.path.join(args.directory, f'{seed:03d}.txt') if not os.path.exists(filename): continue with open(filename) as f: for line in f: if line.startswith('FRAMES'): frames[seed] = int(line.strip().split('\t')[1]) range_averages = {} for start in range(256): range_frames = [] seed = start for i in range(args.size): seed = (start + i) % 256 range_frames.append(frames[seed]) sigma = args.size / 2 / 1.96 dist = statistics.NormalDist(args.size / 2, sigma) total = 0.0 divisor = 0.0 for i, value in enumerate(range_frames): total += dist.pdf(i) * value divisor += dist.pdf(i) average = total / divisor range_averages[start] = average for start, average in sorted(range_averages.items(), key=lambda x: x[1]): print( f'{start:3d} - {(start + args.size - 1) % 256:3d}: {average * 655171 / 39375000000:12.3f}' )
def pochisq(x, df=255): """ Compute probability of χ² test value. Adapted from: Hill, I. D. and Pike, M. C. Algorithm 299 Collected Algorithms for the CACM 1967 p. 243 Updated for rounding errors based on remark in ACM TOMS June 1985, page 185. According to http://www.fourmilab.ch/random/: We interpret the percentage (return value*100) as the degree to which the sequence tested is suspected of being non-random. If the percentage is greater than 99% or less than 1%, the sequence is almost certainly not random. If the percentage is between 99% and 95% or between 1% and 5%, the sequence is suspect. Percentages between 90% and 95% and 5% and 10% indicate the sequence is “almost suspect”. Arguments: x: Obtained χ² value. df: Degrees of freedom, defaults to 255 for random bytes. Returns: The degree to which the sequence tested is suspected of being non-random. """ # Check arguments first if not isinstance(df, int): raise ValueError("df must be an integer") if x <= 0.0 or df < 1: return 1.0 # Constants LOG_SQRT_PI = 0.5723649429247000870717135 # log(√π) I_SQRT_PI = 0.5641895835477562869480795 # 1/√π BIGX = 20.0 a = 0.5 * x even = df % 2 == 0 if df > 1: y = math.exp(-a) nd = stat.NormalDist() s = y if even else 2.0 * nd.cdf(-math.sqrt(x)) if df > 2: x = 0.5 * (df - 1.0) z = 1.0 if even else 0.5 if a > BIGX: e = 0 if even else LOG_SQRT_PI c = math.log(a) while z <= x: e = math.log(z) + e s += math.exp(c * z - a - e) z += 1.0 return s else: e = 1.0 if even else I_SQRT_PI / math.sqrt(a) c = 0.0 while z <= x: e = e * a / z c = c + e z += 1.0 return c * y + s else: return s
def centileFromSDS(z: float) -> float: p = stats.norm.sf(abs(z)) centile = statistics.NormalDist().cdf(p) return centile
def aggregate(self) -> statistics.NormalDist: return statistics.NormalDist(mu=np.mean(self._data), sigma=np.std(self._data))
print(f"❌ Music vs. classics ratio is off: {ratio:.2f} to 1") print() print("5. Music play distribution") print("--------------------------") with open("data/index.json") as index_file: index_entries = json.load(index_file) music_plays = [ entry["play_count"] for entry in index_entries.values() if entry["playlist"] == "music" ] avg = statistics.mean(music_plays) stdev = statistics.pstdev(music_plays, avg) deciles = statistics.quantiles(music_plays, n=10) normal_dist = statistics.NormalDist(avg, stdev) diffs = [ measured - expected for measured, expected in zip(deciles, normal_dist.quantiles(n=10)) ] if all(abs(diff) <= 1 for diff in diffs): print(f"✅ Normal distribution: {avg:.2f}±{stdev:.2f}") elif all(abs(diff) <= 3 for diff in diffs): print(f"🔶 Almost normal distribution: {avg:.2f}±{stdev:.2f}") for i, diff in enumerate(diffs): if diff > 1: print( f" {i + 1}. decile: {diff:.2f} off (measured {deciles[i]:.2f}, " f"expected {deciles[i] - diff:.2f})") else: