def get_minmax(sample, min_samples = 8): """ We consider that a ratio of connections is typical if it falls within the 99.99 % percentile of the Normal distribution N(m, v) modelling ratios. This ensures that the expected rate of false alarms is about 1/10000, and therefore only a handful a week (given the large number of jurisdictions). Similarly, we infer the range of the rate of usage from each jurisdiction (given Ci j) to be the 99.99 % percentile range of a Poisson distribution with parameter Ci j. This full range must be within the typical range of ratios to avoid raising an alarm. Args: sample (pandas.core.series.Series): A series containing the relative change values for a set of countries. """ log.debug("Getting min and max for a sample on {0}'s data: {1}".format(sample.name, sample)) initial_sample_len = len(sample) if initial_sample_len > min_samples: sample = drop_outliers(sample) num_outliers = initial_sample_len - len(sample) log.debug("Sample had {0} outliers removed. Current sample: {1}".format(num_outliers, sample)) if len(sample) > min_samples: mu, signma = norm.fit(sample) sample_max = norm.ppf(0.9999, mu, signma) sample_min = norm.ppf(1 - 0.9999, mu, signma) log.debug("Sample min == {0}, Sample max == {1}".format(sample_min, sample_max)) return pd.Series({"max":sample_max, "min":sample_min}) else: log.debug("After removing outliers the sample was a length of {0}. This is shorter than acceptable minimum length of {1}.".format(len(sample), min_samples)) return pd.Series({"max":None, "min":None}) else: log.debug("Sample with length of {0} is shorter than acceptable minimum length of {1}.".format(initial_sample_len, min_samples)) return pd.Series({"max":None, "min":None})
def make_tendencies_minmax(l, INTERVAL = 1): lminus1 = dict([(ccode, n_day_rel(l[ccode], INTERVAL)) for ccode in l]) c = lminus1[lminus1.keys()[0]] dists = [] minx = [] maxx = [] for i in range(len(c)): vals = [lminus1[ccode][i] for ccode in lminus1.keys() if lminus1[ccode][i] != None] if len(vals) < 8: dists += [None] minx += [None] maxx += [None] else: vals.sort() median = vals[len(vals)/2] q1 = vals[len(vals)/4] q2 = vals[(3*len(vals))/4] qd = q2 - q1 vals = [v for v in vals if median - qd*4 < v and v < median + qd*4] if len(vals) < 8: dists += [None] minx += [None] maxx += [None] continue mu, signma = norm.fit(vals) dists += [(mu, signma)] maxx += [norm.ppf(0.9999, mu, signma)] minx += [norm.ppf(1 - 0.9999, mu, signma)] ## print minx[-1], maxx[-1] return minx, maxx
def make_tendencies_minmax(l, INTERVAL=1): lminus1 = dict([(ccode, n_day_rel(l[ccode], INTERVAL)) for ccode in l]) c = lminus1[lminus1.keys()[0]] dists = [] minx = [] maxx = [] for i in range(len(c)): vals = [ lminus1[ccode][i] for ccode in lminus1.keys() if lminus1[ccode][i] != None ] if len(vals) < 8: dists += [None] minx += [None] maxx += [None] else: vals.sort() median = vals[len(vals) / 2] q1 = vals[len(vals) / 4] q2 = vals[(3 * len(vals)) / 4] qd = q2 - q1 vals = [ v for v in vals if median - qd * 4 < v and v < median + qd * 4 ] if len(vals) < 8: dists += [None] minx += [None] maxx += [None] continue mu, signma = norm.fit(vals) dists += [(mu, signma)] maxx += [norm.ppf(0.9999, mu, signma)] minx += [norm.ppf(1 - 0.9999, mu, signma)] ## print minx[-1], maxx[-1] return minx, maxx
def norm_fit(): # this is used to define AN and BN thresholds # https://www.hko.gov.hk/en/wxinfo/season/intpret.htm # loop through two fields and four seasons return [ [ [ norm.fit( [seasonal_avg_dict_ls_ls[f][s][year] for year in range(*yrs)] ) for yrs in climat_yrs ] for s, season in enumerate(season_names) ] for f, field in enumerate(fields) ]
def uncertainty_histplot(self, xcumprod: bool = True, color_style: str = 'oil', hidey: bool = True, figsize=(12, 8)): """ Plot EUR uncertainty based on specified Arps model. Parameters --- rate_eur : float Minimum rate based on commercial or technical limit. sample : int Sample size to estimate the uncertainty seed : int Random seed xcumprod : bool True for cumulative production, False for time color_style : str set 'oil' theme or 'gas' theme hidey : bool hide probability density axis figsize : tuple Figure size """ plt.style.use('ggplot') if color_style == 'gas': regclr = 'brown' sampleclr = 'coral' else: regclr = 'darkgreen' sampleclr = 'limegreen' _, ax = plt.subplots(figsize=figsize) if xcumprod: loc, scale = norm.fit(self._cumprod_mc[:, -1]) ax.hist(self._cumprod_mc[:, -1], bins='auto', density=True, color=sampleclr, alpha=0.5) else: loc, scale = norm.fit(self._time_mc[:, -1]) ax.hist(self._time_mc[:, -1], bins='auto', density=True, color=sampleclr, alpha=0.5) dist = norm(loc=loc, scale=scale) xaxis = np.linspace(dist.ppf(0.0005), dist.ppf(0.9995)) normal = ax.plot(xaxis, dist.pdf(xaxis), color=regclr) if xcumprod: P90 = np.percentile(self._cumprod_mc[:, -1], 10) P50 = self.eur(self._rate_eur) P10 = np.percentile(self._cumprod_mc[:, -1], 90) else: P90 = np.percentile(self._time_mc[:, -1], 10) P50 = self.time(self._rate_eur) P10 = np.percentile(self._time_mc[:, -1], 90) low = ax.axvline(P90, color=regclr, linestyle='--', alpha=0.5) mid = ax.axvline(P50, color=regclr, linestyle='--', alpha=0.5) hgh = ax.axvline(P10, color=regclr, linestyle='--', alpha=0.5) ax.set_xlim(xaxis[0], xaxis[-1]) ax.get_xaxis().set_major_formatter( FuncFormatter(lambda x, p: format(int(x), ','))) if xcumprod: ax.set_title((f'EUR Distribution based on Arps Model\n' f'Selected Method: {self._MODEL_NAME[self._model]}')) ax.set_xlabel('Estimated Ultimate Recovery') else: ax.set_title((f'Time Distribution at EUR based on Arps Model\n' f'Selected Method: {self._MODEL_NAME[self._model]}')) ax.set_xlabel('Time') ax.set_ylabel('Probability Density') if hidey: ax.yaxis.set_ticklabels([]) ax.legend([normal, low, mid, hgh], ('PDF', f'P90: {round(P90, 2)}', f'P50: {round(P50, 2)}', f'P10: {round(P10, 2)}')) plt.show()