コード例 #1
0
ファイル: detector.py プロジェクト: seamustuohy/tor_anomaly
def get_minmax(sample, min_samples = 8):
    """

    We consider that a ratio of connections is typical if it falls within the 99.99 % percentile of the Normal distribution N(m, v) modelling ratios. This ensures that the expected rate of false alarms is about 1/10000, and therefore only a handful a week (given the large number of jurisdictions). Similarly, we infer the range of the rate of usage from each jurisdiction (given Ci j) to be the 99.99 % percentile range of a Poisson distribution with parameter Ci j. This full range must be within the typical range of ratios to avoid raising an alarm.

    Args:
        sample (pandas.core.series.Series): A series containing the relative change values for a set of countries.
    """

    log.debug("Getting min and max for a sample on {0}'s data: {1}".format(sample.name, sample))
    initial_sample_len = len(sample)

    if initial_sample_len > min_samples:
        sample = drop_outliers(sample)

        num_outliers = initial_sample_len - len(sample)
        log.debug("Sample had {0} outliers removed. Current sample: {1}".format(num_outliers, sample))

        if len(sample) > min_samples:
            mu, signma = norm.fit(sample)
            sample_max = norm.ppf(0.9999, mu, signma)
            sample_min = norm.ppf(1 - 0.9999, mu, signma)

            log.debug("Sample min == {0}, Sample max == {1}".format(sample_min, sample_max))

            return pd.Series({"max":sample_max, "min":sample_min})
        else:
            log.debug("After removing outliers the sample was a length of {0}. This is shorter than acceptable minimum length of {1}.".format(len(sample), min_samples))

            return pd.Series({"max":None, "min":None})
    else:
        log.debug("Sample with length of {0} is shorter than acceptable minimum length of {1}.".format(initial_sample_len, min_samples))

        return pd.Series({"max":None, "min":None})
コード例 #2
0
ファイル: detector.py プロジェクト: gsathya/metrics-tasks
def make_tendencies_minmax(l, INTERVAL = 1):
  lminus1 = dict([(ccode, n_day_rel(l[ccode], INTERVAL)) for ccode in l])
  c = lminus1[lminus1.keys()[0]]
  dists = []
  minx = []
  maxx = []
  for i in range(len(c)):
    vals = [lminus1[ccode][i] for ccode in lminus1.keys() if lminus1[ccode][i] != None]
    if len(vals) < 8:
      dists += [None]
      minx += [None]
      maxx += [None]
    else:
      vals.sort()
      median = vals[len(vals)/2]
      q1 = vals[len(vals)/4]
      q2 = vals[(3*len(vals))/4]
      qd = q2 - q1
      vals = [v for v in vals if median - qd*4 < v and  v < median + qd*4]
      if len(vals) < 8:
        dists += [None]
        minx += [None]
        maxx += [None]
        continue
      mu, signma = norm.fit(vals)
      dists += [(mu, signma)]
      maxx += [norm.ppf(0.9999, mu, signma)]
      minx += [norm.ppf(1 - 0.9999, mu, signma)]
  ## print minx[-1], maxx[-1]
  return minx, maxx
コード例 #3
0
ファイル: detector.py プロジェクト: teor2345/metrics-web
def make_tendencies_minmax(l, INTERVAL=1):
    lminus1 = dict([(ccode, n_day_rel(l[ccode], INTERVAL)) for ccode in l])
    c = lminus1[lminus1.keys()[0]]
    dists = []
    minx = []
    maxx = []
    for i in range(len(c)):
        vals = [
            lminus1[ccode][i] for ccode in lminus1.keys()
            if lminus1[ccode][i] != None
        ]
        if len(vals) < 8:
            dists += [None]
            minx += [None]
            maxx += [None]
        else:
            vals.sort()
            median = vals[len(vals) / 2]
            q1 = vals[len(vals) / 4]
            q2 = vals[(3 * len(vals)) / 4]
            qd = q2 - q1
            vals = [
                v for v in vals if median - qd * 4 < v and v < median + qd * 4
            ]
            if len(vals) < 8:
                dists += [None]
                minx += [None]
                maxx += [None]
                continue
            mu, signma = norm.fit(vals)
            dists += [(mu, signma)]
            maxx += [norm.ppf(0.9999, mu, signma)]
            minx += [norm.ppf(1 - 0.9999, mu, signma)]
    ## print minx[-1], maxx[-1]
    return minx, maxx
コード例 #4
0
def norm_fit():
    # this is used to define AN and BN thresholds
    # https://www.hko.gov.hk/en/wxinfo/season/intpret.htm

    # loop through two fields and four seasons
    return [
        [
            [
                norm.fit(
                    [seasonal_avg_dict_ls_ls[f][s][year] for year in range(*yrs)]
                ) for yrs in climat_yrs
            ] for s, season in enumerate(season_names)
        ] for f, field in enumerate(fields)
    ]
コード例 #5
0
    def uncertainty_histplot(self,
                             xcumprod: bool = True,
                             color_style: str = 'oil',
                             hidey: bool = True,
                             figsize=(12, 8)):
        """ Plot EUR uncertainty based on specified Arps model.

        Parameters
        ---
        rate_eur : float
            Minimum rate based on commercial or technical limit.
        sample : int
            Sample size to estimate the uncertainty
        seed : int
            Random seed
        xcumprod : bool
            True for cumulative production, False for time
        color_style : str
            set 'oil' theme or 'gas' theme
        hidey : bool
            hide probability density axis
        figsize : tuple
            Figure size
        """
        plt.style.use('ggplot')
        if color_style == 'gas':
            regclr = 'brown'
            sampleclr = 'coral'
        else:
            regclr = 'darkgreen'
            sampleclr = 'limegreen'
        _, ax = plt.subplots(figsize=figsize)
        if xcumprod:
            loc, scale = norm.fit(self._cumprod_mc[:, -1])
            ax.hist(self._cumprod_mc[:, -1],
                    bins='auto',
                    density=True,
                    color=sampleclr,
                    alpha=0.5)
        else:
            loc, scale = norm.fit(self._time_mc[:, -1])
            ax.hist(self._time_mc[:, -1],
                    bins='auto',
                    density=True,
                    color=sampleclr,
                    alpha=0.5)

        dist = norm(loc=loc, scale=scale)
        xaxis = np.linspace(dist.ppf(0.0005), dist.ppf(0.9995))
        normal = ax.plot(xaxis, dist.pdf(xaxis), color=regclr)
        if xcumprod:
            P90 = np.percentile(self._cumprod_mc[:, -1], 10)
            P50 = self.eur(self._rate_eur)
            P10 = np.percentile(self._cumprod_mc[:, -1], 90)
        else:
            P90 = np.percentile(self._time_mc[:, -1], 10)
            P50 = self.time(self._rate_eur)
            P10 = np.percentile(self._time_mc[:, -1], 90)
        low = ax.axvline(P90, color=regclr, linestyle='--', alpha=0.5)
        mid = ax.axvline(P50, color=regclr, linestyle='--', alpha=0.5)
        hgh = ax.axvline(P10, color=regclr, linestyle='--', alpha=0.5)

        ax.set_xlim(xaxis[0], xaxis[-1])
        ax.get_xaxis().set_major_formatter(
            FuncFormatter(lambda x, p: format(int(x), ',')))
        if xcumprod:
            ax.set_title((f'EUR Distribution based on Arps Model\n'
                          f'Selected Method: {self._MODEL_NAME[self._model]}'))
            ax.set_xlabel('Estimated Ultimate Recovery')
        else:
            ax.set_title((f'Time Distribution at EUR based on Arps Model\n'
                          f'Selected Method: {self._MODEL_NAME[self._model]}'))
            ax.set_xlabel('Time')
        ax.set_ylabel('Probability Density')

        if hidey:
            ax.yaxis.set_ticklabels([])
        ax.legend([normal, low, mid, hgh],
                  ('PDF', f'P90: {round(P90, 2)}', f'P50: {round(P50, 2)}',
                   f'P10: {round(P10, 2)}'))
        plt.show()