Esempio n. 1
0
def median_confidence_interval(dx, cutoff=.95):
    ''' cutoff is the significance level as a decimal between 0 and 1'''
    dx = sorted(dx, reverse=False)
    factor = statistics.NormalDist().inv_cdf((1 + cutoff) / 2)
    factor *= math.sqrt(len(dx))  # avoid doing computation twice

    lix = round(0.5 * (len(dx) - factor))
    uix = round(0.5 * (1 + len(dx) + factor))

    return (dx[lix], dx[uix])
Esempio n. 2
0
 def fit(self, X_train, y_train):
     y_col = y_train.columns[-1]
     self.variables = X_train.columns
     self.prior_probabilities = dict.fromkeys(y_train[y_col].unique())
     for cls in self.prior_probabilities.keys():
         count = len(y_train[y_train[y_col] == cls])
         self.prior_probabilities[cls] = count / len(y_train)
     self.variable_distr_A1 = dict.fromkeys(X_train.columns)
     self.variable_distr_A2 = dict.fromkeys(X_train.columns)
     train_A1 = X_train[y_train[y_col] == 0]
     train_A2 = X_train[y_train[y_col] == 1]
     for var in X_train.columns:
         mu_A1 = train_A1[var].mean()
         sigma_A1 = train_A1[var].std()
         self.variable_distr_A1[var] = statistics.NormalDist(mu=mu_A1,
                                                             sigma=sigma_A1)
         mu_A2 = train_A2[var].mean()
         sigma_A2 = train_A2[var].std()
         self.variable_distr_A2[var] = statistics.NormalDist(mu=mu_A2,
                                                             sigma=sigma_A2)
Esempio n. 3
0
 def _statistics_():
     print("Statistics")
     print("--")
     print(statistics.fmean([3.5, 4.0, 5.25]))
     print("--")
     print(round(statistics.geometric_mean([54, 24, 36]), 1))
     print("--")
     temperature_feb = statistics.NormalDist.from_samples(
         [4, 12, -3, 2, 7, 14])
     print(temperature_feb.mean)
     print(temperature_feb.stdev)
     print(temperature_feb.cdf(3))
     print(temperature_feb.pdf(7) / temperature_feb.pdf(10))
     el_niño = statistics.NormalDist(4, 2.5)
     temperature_feb += el_niño
     print(temperature_feb)
     statistics.NormalDist(mu=10.0, sigma=6.830080526611674)
     temperature_feb * (9 / 5) + 32
     statistics.NormalDist(mu=50.0, sigma=12.294144947901014)
     print(temperature_feb.samples(3))
     print("--------")
Esempio n. 4
0
    def terminate(self, measurement: float) -> bool:
        self._data.append(measurement)

        if len(self._data) > 10:
            (mu, sigma) = stats.norm.fit(self._data)
            current_fit = statistics.NormalDist(mu=mu, sigma=sigma)

            if self._last_fit:
                current_overlap = current_fit.overlap(other=self._last_fit)
                if current_overlap >= self._threshold:
                    return True

            self._last_fit = current_fit

        return False
Esempio n. 5
0
def normal_quantiles(length: int,
                     mu: float = 0.0,
                     sigma: float = 1.0) -> Generator[float, None, None]:

    norm_dist = statistics.NormalDist(mu=mu, sigma=sigma)
    splits = 1.0 / length
    current = splits
    count = 0
    while count < length:
        if current >= 1.0:
            current = 0.9999999999999999

        yield norm_dist.inv_cdf(current)
        current += splits
        count += 1
Esempio n. 6
0
    def run(self):
        df = self.time_series()
        ts = df[df.keys()[0]]

        current = ts[0]
        past = ts[1:]

        dist = statistics.NormalDist(past.mean(), past.std())
        cdf = dist.cdf(current)

        if cdf > 0.99:
            return StrategyResult.RED
        elif cdf > 0.95:
            return StrategyResult.ORANGE
        else:
            return StrategyResult.GREEN
Esempio n. 7
0
    async def daily(self, ctx: MyContext):
        """Get some more experience..."""
        db_hunter: Player = await get_player(ctx.author, ctx.channel)

        _ = await ctx.get_translate_function()
        if db_hunter.prestige < 3:
            await ctx.send(
                _("❌ Your prestige level is not high enough yet. "
                  "See `{ctx.prefix}prestige info` to learn more."))
            return False

        now = datetime.datetime.now()
        if db_hunter.prestige_last_daily.date() == now.date():
            nextturn = db_hunter.prestige_last_daily.date() - now.date()
            await ctx.send(
                _("❌ You already claimed your dailies today. Try again in: `{td}`",
                  td=format_timedelta(nextturn, locale='en_US')))
            return False

        max_experience = 20 * db_hunter.prestige
        distrib = statistics.NormalDist(max_experience / 2, max_experience / 6)
        added_experience = int(distrib.samples(1)[0])

        added_experience = min(max(5, added_experience), max_experience + 5)

        await db_hunter.edit_experience_with_levelups(ctx, added_experience)
        db_hunter.prestige_last_daily = now
        db_hunter.prestige_dailies += 1

        await db_hunter.save()

        if ctx.author.id == 618209176434507816:
            # This is just a prank for the guy who made me add the Normal Dist,
            # with "a tiny chance for it to become negative"
            # It's not really negative, but heh :)
            # It'll look like so.
            added_experience = -added_experience

        await ctx.send(
            _("💰️ You took {exp} experience out of the prestige bank. Come back soon!",
              exp=added_experience))
Esempio n. 8
0
def subcommand_range(args):
    frames = {}

    for seed in range(256):
        filename = os.path.join(args.directory, f'{seed:03d}.txt')

        if not os.path.exists(filename):
            continue

        with open(filename) as f:
            for line in f:
                if line.startswith('FRAMES'):
                    frames[seed] = int(line.strip().split('\t')[1])

    range_averages = {}

    for start in range(256):
        range_frames = []
        seed = start

        for i in range(args.size):
            seed = (start + i) % 256
            range_frames.append(frames[seed])

        sigma = args.size / 2 / 1.96
        dist = statistics.NormalDist(args.size / 2, sigma)

        total = 0.0
        divisor = 0.0

        for i, value in enumerate(range_frames):
            total += dist.pdf(i) * value
            divisor += dist.pdf(i)

        average = total / divisor
        range_averages[start] = average

    for start, average in sorted(range_averages.items(), key=lambda x: x[1]):
        print(
            f'{start:3d} - {(start + args.size - 1) % 256:3d}: {average * 655171 / 39375000000:12.3f}'
        )
Esempio n. 9
0
def pochisq(x, df=255):
    """
    Compute probability of χ² test value.

    Adapted from: Hill, I. D. and Pike, M. C.  Algorithm 299 Collected
    Algorithms for the CACM 1967 p. 243 Updated for rounding errors based on
    remark in ACM TOMS June 1985, page 185.

    According to http://www.fourmilab.ch/random/:

      We interpret the percentage (return value*100) as the degree to which
      the sequence tested is suspected of being non-random. If the percentage
      is greater than 99% or less than 1%, the sequence is almost certainly
      not random. If the percentage is between 99% and 95% or between 1% and
      5%, the sequence is suspect. Percentages between 90% and 95% and 5% and
      10% indicate the sequence is “almost suspect”.

    Arguments:
        x: Obtained χ² value.
        df: Degrees of freedom, defaults to 255 for random bytes.

    Returns:
        The degree to which the sequence tested is suspected of being
        non-random.
    """
    # Check arguments first
    if not isinstance(df, int):
        raise ValueError("df must be an integer")
    if x <= 0.0 or df < 1:
        return 1.0
    # Constants
    LOG_SQRT_PI = 0.5723649429247000870717135  # log(√π)
    I_SQRT_PI = 0.5641895835477562869480795  # 1/√π
    BIGX = 20.0
    a = 0.5 * x
    even = df % 2 == 0
    if df > 1:
        y = math.exp(-a)
    nd = stat.NormalDist()
    s = y if even else 2.0 * nd.cdf(-math.sqrt(x))
    if df > 2:
        x = 0.5 * (df - 1.0)
        z = 1.0 if even else 0.5
        if a > BIGX:
            e = 0 if even else LOG_SQRT_PI
            c = math.log(a)
            while z <= x:
                e = math.log(z) + e
                s += math.exp(c * z - a - e)
                z += 1.0
            return s
        else:
            e = 1.0 if even else I_SQRT_PI / math.sqrt(a)
            c = 0.0
            while z <= x:
                e = e * a / z
                c = c + e
                z += 1.0
            return c * y + s
    else:
        return s
Esempio n. 10
0
def centileFromSDS(z: float) -> float:
    p = stats.norm.sf(abs(z))
    centile = statistics.NormalDist().cdf(p)
    return centile
Esempio n. 11
0
 def aggregate(self) -> statistics.NormalDist:
     return statistics.NormalDist(mu=np.mean(self._data),
                                  sigma=np.std(self._data))
Esempio n. 12
0
    print(f"❌ Music vs. classics ratio is off: {ratio:.2f} to 1")
print()

print("5. Music play distribution")
print("--------------------------")
with open("data/index.json") as index_file:
    index_entries = json.load(index_file)

music_plays = [
    entry["play_count"] for entry in index_entries.values()
    if entry["playlist"] == "music"
]
avg = statistics.mean(music_plays)
stdev = statistics.pstdev(music_plays, avg)
deciles = statistics.quantiles(music_plays, n=10)
normal_dist = statistics.NormalDist(avg, stdev)
diffs = [
    measured - expected
    for measured, expected in zip(deciles, normal_dist.quantiles(n=10))
]

if all(abs(diff) <= 1 for diff in diffs):
    print(f"✅ Normal distribution: {avg:.2f}±{stdev:.2f}")
elif all(abs(diff) <= 3 for diff in diffs):
    print(f"🔶 Almost normal distribution: {avg:.2f}±{stdev:.2f}")
    for i, diff in enumerate(diffs):
        if diff > 1:
            print(
                f"   {i + 1}. decile: {diff:.2f} off (measured {deciles[i]:.2f}, "
                f"expected {deciles[i] - diff:.2f})")
else: