Exemple #1
0
def get_poisson_distribution(date_range, country_code, global_min, global_max):
    """
    Args:
        date_range (pandas.core.series.Series): The date range of country data for the poisson distribution to be applied to.
        country_code (string): The country code of the country being explored.
        global_min (pandas.core.series.Series): A time series list of the global minimum tendencies for tor users.
        global_max (pandas.core.series.Series): A time series list of the global maximum tendencies for tor users.


    """
    current_date = date_range[0]
    comparison_date = date_range[1]
    #print(date_range)

    # If there is not a global min or a global max on the day in question then don't even try
    if pd.isnull(global_min[date_range.name]) or pd.isnull(global_max[date_range.name]):
        return pd.Series({"country":country_code,"min":None, "max":None})

    # We can't do this without both dates
    if np.isnan(comparison_date) or np.isnan(current_date):
        return pd.Series({"country":country_code,"min":None, "max":None})
    else:
        down_score = 0
        up_score = 0
        # poisson.ppf(plausable_range, shape_params)
        min_range = global_min[date_range.name] * poisson.ppf(1-0.9999, comparison_date)
        max_range = global_max[date_range.name] * poisson.ppf(0.9999, comparison_date)
        if current_date < min_range:
            down_score = 1
        if current_date > max_range:
            up_score = 1

        return pd.Series({"country":country_code,"min":min_range, "max":max_range, "users":current_date, "event_censor":down_score, "event_spike":up_score})
Exemple #2
0
def CorrectInitialCorrel(lambda1, lambda2, r):
    samples = 500
    u = np.random.uniform(low=0, high=1, size=samples)
    maxcor = pearsonr(poisson.ppf(u, lambda1), poisson.ppf(u, lambda2))
    mincor = pearsonr(poisson.ppf(u, lambda1), poisson.ppf(1 - u, lambda2))
    a = -maxcor[0] * mincor[0] / (maxcor[0] + mincor[0])
    b = np.log((maxcor[0] + a) / a)
    c = -a
    corrected = np.log((r + a) / a) / b
    return np.NaN if corrected > 1 or corrected < -1 else corrected
Exemple #3
0
def absolute_plot(series, minc, maxc, labels,INTERVAL, xtitle):
  in_minc = []
  in_maxc = []
  for i, v in enumerate(series):
    if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None:
      in_minc += [minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])]
      in_maxc += [maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])]
      if not in_minc[-1] < in_maxc[-1]:
        print in_minc[-1], in_maxc[-1], series[i-INTERVAL], minc[i], maxc[i]
      assert in_minc[-1] < in_maxc[-1]
    else:
      in_minc += [None]
      in_maxc += [None]
  raw_plot(series, in_minc, in_maxc, labels, xtitle)
Exemple #4
0
def write_all(tss, minc, maxc, INTERVAL=7):
  ranges_file = file("direct-users-ranges.csv", "w")
  ranges_file.write("date,country,minusers,maxusers\n")
  exclude = set(["all", "??", "date"])
  for c in tss.country_codes:
    if c in exclude:
      continue
    series = tss.get_country_series(c)
    for i, v in enumerate(series):
      if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None:
        minv = minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])
        maxv = maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])
        if not minv < maxv:
          print minv, maxv, series[i-INTERVAL], minc[i], maxc[i]
        assert minv < maxv
        ranges_file.write("%s,%s,%s,%s\n" % (tss.all_dates[i], c, minv, maxv))
  ranges_file.close()
Exemple #5
0
def censor_score(series, minc, maxc, INTERVAL, scoring_interval=None):
  upscore = 0
  downscore = 0

  if scoring_interval is None:
    scoring_interval = len(series)
  assert(len(series) >= scoring_interval)

  for i, v in enumerate(series):
    if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None:
      in_minc = minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])
      in_maxc = maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])
      if (i >= (len(series) - scoring_interval)):
        downscore += 1 if minc[i] != None and v < in_minc else 0
        upscore += 1 if maxc[i] != None and v > in_maxc else 0

  return downscore, upscore
Exemple #6
0
def GenerateMultivariatePoisson(p, samples, R, lmbda):
    normal_mu = np.repeat(0, p)  # Dimensions of Distribution
    normal = np.random.multivariate_normal(mean=normal_mu, cov=R,
                                           size=samples).T
    p = norm.cdf(normal)
    pois = poisson.ppf(p, lmbda)  # Inverse Poisson Distribution

    return pois
Exemple #7
0
def censor_score(series, minc, maxc, INTERVAL, scoring_interval=None):
    upscore = 0
    downscore = 0

    if scoring_interval is None:
        scoring_interval = len(series)
    assert (len(series) >= scoring_interval)

    for i, v in enumerate(series):
        if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[
                i - INTERVAL] != None and series[
                    i - INTERVAL] != 0 and minc[i] != None and maxc[i] != None:
            in_minc = minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL])
            in_maxc = maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL])
            if (i >= (len(series) - scoring_interval)):
                downscore += 1 if minc[i] != None and v < in_minc else 0
                upscore += 1 if maxc[i] != None and v > in_maxc else 0

    return downscore, upscore
Exemple #8
0
def absolute_plot(series, minc, maxc, labels, INTERVAL, xtitle):
    in_minc = []
    in_maxc = []
    for i, v in enumerate(series):
        if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[
                i - INTERVAL] != None and series[
                    i - INTERVAL] != 0 and minc[i] != None and maxc[i] != None:
            in_minc += [
                minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL])
            ]
            in_maxc += [maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL])]
            if not in_minc[-1] < in_maxc[-1]:
                print in_minc[-1], in_maxc[-1], series[
                    i - INTERVAL], minc[i], maxc[i]
            assert in_minc[-1] < in_maxc[-1]
        else:
            in_minc += [None]
            in_maxc += [None]
    raw_plot(series, in_minc, in_maxc, labels, xtitle)
Exemple #9
0
def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7):
    ranges_file = file(RANGES_FILE, "w")
    ranges_file.write("date,country,minusers,maxusers\n")
    exclude = set(["all", "??", "date"])
    for c in tss.country_codes:
        if c in exclude:
            continue
        series = tss.get_country_series(c)
        for i, v in enumerate(series):
            if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[
                    i - INTERVAL] != None and series[
                        i -
                        INTERVAL] != 0 and minc[i] != None and maxc[i] != None:
                minv = minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL])
                maxv = maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL])
                if not minv < maxv:
                    print minv, maxv, series[i - INTERVAL], minc[i], maxc[i]
                assert minv < maxv
                ranges_file.write("%s,%s,%s,%s\n" %
                                  (tss.all_dates[i], c, minv, maxv))
    ranges_file.close()
Exemple #10
0
    z[:, j] = a * z[:, j - 1] + np.sqrt(1 - a**2) * z[:, j]
u = norm.cdf(z)

# The covariates
x1 = np.random.normal(size=(n, q))
x2 = np.random.normal(size=(n, q))

# The mean parameters for the marginal distributions
lpr = x1 - 0.5 * x2
expval = np.exp(lpr)

# The response values.  These are marginally Poisson with the specified means.
y = np.zeros((n, q))
for i in range(n):
    for j in range(q):
        y[i, j] = poisson.ppf(u[i, j], expval[i, j])

idv = np.outer(np.arange(n), np.ones(q))
time = np.outer(np.ones(n), np.arange(q))

df = pd.DataFrame({
    "y": y.flat,
    "x1": x1.flat,
    "x2": x2.flat,
    "grp": idv.flat,
    "time": time.flat
})

model = sm.GEE.from_formula("y ~ x1 + x2",
                            groups="grp",
                            family=sm.families.Poisson(),
def negbinom(u, mu, scale):
    p = (scale - 1) / scale
    r = mu * (1 - p) / p
    x = np.random.gamma(r, p / (1 - p), len(u))
    return poisson.ppf(u, mu=x)