def plot_distribution(self):
     """
     Plot the distrubution of estimated Coronavirus cases in Dhaka 
     """
     p = self.calculate_pro_detected_overseas()
     n = self.international.cases
     
     fig, ax = plt.subplots(1, 1)
     x = np.arange(nbinom.ppf(0.025, n, p),
                nbinom.ppf(0.975, n, p))
     ax.vlines(x, 0, nbinom.pmf(x, n, p), color='lightblue', lw=5, alpha=0.5)
     ax.set_title(" pmf of coronavirus cases in Dhaka " + self.date)
Esempio n. 2
0
    def qNBI(q: float, location: np.ndarray, scale: np.ndarray):
        """Quantile function.

        """
        n = 1 / scale
        p = n / (n + location)
        if len(scale) > 1:
            quant = np.where(scale > 1e-04, nbinom.ppf(q=q, n=n, p=p),
                             poisson.ppf(q=q, mu=location))
        else:
            quant = poisson.ppf(q=q,
                                mu=location) if scale < 1e-04 else nbinom.ppf(
                                    q=q, n=n, p=p)
        return quant
def calc_coverage_threshold(cov_dict):
    '''
    calculate minimum coverage threshold for each key in cov_dict.
    see end of 'alternative parameterization' section of Negative binomial page
    and scipy negative binomial documentation for details of calculation.
    '''
    threshold_dict = {}
    for g in cov_dict:
        mean = float(cov_dict[g]['mean'])
        var = float(cov_dict[g]['variance'])
        q = (var-mean)/var
        n = mean**2/(var-mean)
        p = 1 - q

        ## assert that I did the math correctly.
        assert(isclose(nbinom.mean(n,p), mean))
        assert(isclose(nbinom.var(n,p), var))

        ## find the integer threshold that includes ~95% of REL606 distribution,
        ## excluding 5% on the left hand side.
        my_threshold = nbinom.ppf(0.05,n,p)
        my_threshold_p = nbinom.cdf(my_threshold,n,p)
        threshold_dict[g] = {'threshold':str(my_threshold),
                             'threshold_p':str(my_threshold_p)}
    return threshold_dict
Esempio n. 4
0
 def _ppf(self, q, mu, alpha, p, w):
     s, p = self.convert_params(mu, alpha, p)
     # we just translated and stretched q to remove zi
     q_mod = (q - w) / (1 - w)
     x = nbinom.ppf(q_mod, s, p)
     # set to zero if in the zi range
     x[q < w] = 0
     return x
def plot_nbinom(r, p):
    left  = nbinom.ppf(0.01, r, p)
    right = nbinom.ppf(0.99, r, p)
    x = np.arange(
        left,
        right,
        int((right - left) / 10)
    )
    plt.plot(
        x,
        nbinom.pmf(x, r, p),
        alpha=0.6,
        color='gray'
    )
    plt.plot(
        x,
        nbinom.pmf(x, r, p),
        'o',
        label='$r=%s, p = %s$' % (r, p)
    )
Esempio n. 6
0
File: gacha.py Progetto: iCodeIN/yui
 async def challenge(
     self,
     bot,
     event: Message,
     successes: int,
     chance: str,
 ):
     if successes < SUCCESSES_MIN or successes > SUCCESSES_MAX:
         await bot.say(
             event.channel,
             f'성공횟수는 {SUCCESSES_MIN}회 이상,'
             f' {SUCCESSES_MAX:,}회 이하로 입력해주세요!',
         )
         return
     try:
         if chance.endswith('%'):
             p = Decimal(chance[:-1]) / 100
         else:
             p = Decimal(chance)
     except InvalidOperation:
         await bot.say(event.channel, '정상적인 확률을 입력해주세요!')
         return
     if p < CHANCE_MIN or p > CHANCE_MAX:
         await bot.say(
             event.channel,
             f'확률값은 {to_percent(CHANCE_MIN)}% 이상,'
             f' {to_percent(CHANCE_MAX)}% 이하로 입력해주세요!',
         )
         return
     if p / successes < CHANCE_MIN:
         await bot.say(event.channel, '입력하신 확률값에 비해 성공 횟수가 너무 많아요!')
         return
     counts = {
         int(math.ceil(nbinom.ppf(float(q), successes, float(p))))
         for q in filter(lambda x: x >= p, CHANCES + [p])
     }
     results = [
         (x, Decimal(str(nbinom.cdf(x, successes, float(p)))))
         for x in sorted(counts)
     ]
     text = '\n'.join(
         f'- {tries+successes:,}번 시도하시면 {to_percent(ch, D001)}% 확률로'
         f' 목표 횟수만큼 성공할 수 있어요!'
         for tries, ch in results
     )
     await bot.say(
         event.channel,
         f'{to_percent(p)}% 확률의 도전을 {successes:,}번'
         f' 성공시키려면 몇 회의 도전이 필요한지 알려드릴게요!\n{text}',
     )
Esempio n. 7
0
def neg_binom_demand_distribution(C, t, r=10, p=.5):
    n = int(C.shape[0])
    U = np.linalg.cholesky(C)

    raw_demand = np.random.normal(size=(t, n))

    shifted_demand = np.dot(raw_demand, U.T)

    flat_demand = flatten_matrix(shifted_demand)
    true_std = np.std(flat_demand)
    true_mean = np.mean(flat_demand)

    normalized_demand = (shifted_demand - true_mean) / true_std

    new_demand = nbinom.ppf(norm.cdf(normalized_demand), r, p)

    return new_demand.T
Esempio n. 8
0
def gen_ztnegbinom(n, mu, size):
    """Zero truncated negative binomial distribution.

        input:  n, int
                number of successes

                mu, float or int
                number of trials

                size, float
                probability of success

        output: ztnb, list of int
                draws from a zero truncated negative binomial distribution
    """

    temp = nbinom.pmf(0, mu, size)
    p = [uniform.rvs(loc=temp[i], scale=1-temp[i]) for i in range(n)]
    ztnb = [int(nbinom.ppf(p[i], mu[i], size)) for i in range(n)]

    return np.array(ztnb)
    def generate_graph_data(self):
        ageGroup = self.tableModel.data[self.selected_item_index.row()][0]
        parameter = self.tableModel.data[self.selected_item_index.row()][1]
        p1 = self.temporaryParametersDict[ageGroup][parameter]["p1"]
        p2 = self.temporaryParametersDict[ageGroup][parameter]["p2"]

        distributionType = self.temporaryParametersDict[ageGroup][parameter][
            "distributionType"]
        xyDict = {"x": [], "y": []}
        try:
            if distributionType == 'Binomial':
                xyDict["x"] = np.arange(binom.ppf(0.01, int(p1), p2 / 100),
                                        binom.ppf(0.99, int(p1), p2 / 100))
                xyDict["y"] = binom.pmf(xyDict["x"], int(p1), p2 / 100)
            elif distributionType == 'Geometric':
                xyDict["x"] = np.arange(geom.ppf(0.01, p1 / 100),
                                        geom.ppf(0.99, p1 / 100))
                xyDict["y"] = geom.pmf(xyDict["x"], p1 / 100)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            elif distributionType == 'Laplacian':
                xyDict["x"] = np.arange(dlaplace.ppf(0.01, p1 / 100),
                                        dlaplace.ppf(0.99, p1 / 100))
                xyDict["y"] = dlaplace.pmf(xyDict["x"], p1 / 100)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            elif distributionType == 'Logarithmic':
                xyDict["x"] = np.arange(logser.ppf(0.01, p1 / 100),
                                        logser.ppf(0.99, p1 / 100))
                xyDict["y"] = logser.pmf(xyDict["x"], p1 / 100)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            elif distributionType == 'Neg. binomial':
                xyDict["x"] = np.arange(nbinom.ppf(0.01, p1, p2 / 100),
                                        nbinom.ppf(0.99, p1, p2 / 100))
                xyDict["y"] = nbinom.pmf(xyDict["x"], p1, p2 / 100)
            elif distributionType == 'Planck':
                xyDict["x"] = np.arange(planck.ppf(0.01, p1 / 100),
                                        planck.ppf(0.99, p1 / 100))
                xyDict["y"] = planck.pmf(xyDict["x"], p1 / 100)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            elif distributionType == 'Poisson':
                xyDict["x"] = np.arange(poisson.ppf(0.01, p1),
                                        poisson.ppf(0.99, p1))
                xyDict["y"] = poisson.pmf(xyDict["x"], p1)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            elif distributionType == 'Uniform':
                if p1 - 0.5 * p2 < 0:
                    p2 = p1
                min = p1 - 0.5 * p2
                max = p1 + 0.5 * p2
                xyDict["x"] = np.arange(randint.ppf(0.01, min, max),
                                        randint.ppf(0.99, min, max))
                xyDict["y"] = randint.pmf(xyDict["x"], min, max)
            elif distributionType == 'Zipf (Zeta)':
                xyDict["x"] = np.arange(zipf.ppf(0.01, p1), zipf.ppf(0.99, p1))
                xyDict["y"] = zipf.pmf(xyDict["x"], p1)
                if p2 != 0:
                    self.tableModel.setData(
                        self.selected_item_index.sibling(
                            self.selected_item_index.row(), 3), 0, Qt.EditRole)
            self.update_graph(xyDict)
        except Exception as E:
            log.error(E)
 pstRRM.append(testRRM)
 testRRm=1.+infperiod*ln( gamma.ppf(0.01, a=alpha, scale=1./beta) )
 if (testRRm <0.): testRRm=0.
 pstRRm.append(testRRm)
 #print('estimated RR=',RRest,testRRm,testRRM) # to see the numbers for the evolution of Rt
 
 if (new_cases>0. and old_new_cases>0.):
     NewCases.append(new_cases)
     
     # Using a Negative Binomial as the  Posterior Predictor of New Cases, given old one
     # This takes parameters r,p which are functions of new alpha, beta from Gamma
     r, p = alpha, beta/(old_new_cases+beta)
     mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk')
     
     pred.append(mean) # the expected value of new cases
     testciM=nbinom.ppf(0.99, r, p) # these are the boundaries of the 99% confidence interval  for new cases
     pstdM.append(testciM)
     testcim=nbinom.ppf(0.01, r, p)
     pstdm.append(testcim)
     
     newp=p
     newr=r
     flag=0
     while (new_cases>testciM or new_cases<testcim):
         if (flag==0):
             anomalyday.append(dates[i+1]) # the first new cases are at i=2
             anomalypred.append(new_cases)
         
         #print("anomaly",testcim,new_cases,testciM,nr,np) #New  cases when falling outside the 99% CI
         #annealing: increase variance so as to encompass anomalous observation: allow Bayesian code to recover
         # mean of negbinomial=r*(1-p)/p  variance= r (1-p)/p**2
Esempio n. 11
0
def zero_truncated_NB(size,
                      n,
                      p,
                      poissonLimit=False,
                      quantile=0.999,
                      MHSteps=100):
    """
    returns a sample of size "size" from the negative binomial distribution with 
    parameters n, p under the condition that at least one element in the
    sample is nonzero.
    MHSteps denotes the number of Metropolis-Hastings iterations
    """
    if p == 1:
        poissonLimit = True

    # if obtaining a random sample with total count 0 is sufficiently unlikely,
    # sample until a suitable sample is found.
    if poissonLimit:
        zeroP = np.exp(-size * n)
    else:
        zeroP = p**(size * n)
    if zeroP < 0.7:
        while not poissonLimit:
            result = np.random.negative_binomial(n, p, size)
            if result.any():
                return result
        while poissonLimit:
            result = np.random.poisson(n, size)
            if result.any():
                return result

    # pmf of truncated negative binomial for total count
    q = min(quantile * (1 - zeroP) + zeroP, 0.999999)

    if poissonLimit:
        maxbin = poisson.ppf(q, size * n)
    else:
        dist = nbinom(n, p)
        maxbin = nbinom.ppf(q, size * n, p)

    maxbin = max(maxbin, 5)
    x = np.arange(1, maxbin + 1)
    if poissonLimit:
        trunc_pmf = poisson.pmf(x, size * n)
    else:
        trunc_pmf = nbinom.pmf(x, size * n, p)

    trunc_pmf /= np.sum(trunc_pmf)

    # sampling the total count value
    totalCount = np.random.choice(x, p=trunc_pmf)

    if poissonLimit:
        return np.random.multinomial(totalCount, np.full(size, 1 / size))
    elif totalCount == 1:
        # if only one observation has been made, it does not matter when
        result = np.zeros(size)
        result[0] = 1
        return result
    elif totalCount == 2:
        # if two observations have been made, we have to decide whether they
        # occurred in the same sample or in distinct samples

        # when computing the joint probabilities of the possible events, I
        # neglect factors that appear in all probabilities

        # p11 = (size choose 2) * pmf(1)**2
        p11 = (size - 1) / 2 * dist.pmf(1)**2
        # p20 = size * pmf(2) * pmf(0)
        p20 = dist.pmf(2) * dist.pmf(0)

        norm = p11 + p20
        p11 /= norm
        p20 /= norm

        result = np.zeros(size)
        if np.random.choice([True, False], p=[p11, p20]):
            result[:2] = 1
        else:
            result[0] = 2
        return result
    elif totalCount == 3:
        # p111 = (size choose 3) * pmf(1, n, p)**3
        p111 = (size - 1) * (size - 2) / 6 * dist.pmf(1)**3
        p210 = (size - 1) * dist.pmf(2) * dist.pmf(1) * dist.pmf(0)
        p300 = dist.pmf(3) * dist.pmf(0)**2
        ps = np.array([p111, p210, p300])
        ps /= np.sum(ps)

        result = np.zeros(size)
        choice = np.random.choice(np.arange(3), p=ps)
        if choice == 0:
            result[:3] = 1
        elif choice == 1:
            result[0] = 2
            result[1] = 1
        elif choice == 2:
            result[0] = 3
        return result
    else:
        return _dist_bins_MH(size, totalCount, dist, MHSteps)
Esempio n. 12
0
def run_luis_model(df: pd.DataFrame, filepath: Path) -> None:

    infperiod = 4.5  # length of infectious period, adjust as needed

    def smooth(y, box_pts):
        box = np.ones(box_pts) / box_pts
        y_smooth = np.convolve(y, box, mode='same')
        return y_smooth

    # Loop through states
    states = df['state'].unique()

    returndf = pd.DataFrame()
    for state in states:

        from scipy.stats import gamma  # not sure why this needs to be recalled after each state, but otherwite get a type exception
        import numpy as np

        statedf = df[df['state'] == state].sort_values('date')

        confirmed = list(statedf['positive'])
        dates = list(statedf['date'])
        day = list(range(1, len(statedf['date']) + 1))

        if (confirmed[-1] < 10.):
            continue  # this skips the Rt analysis for states for which there are <10 total cases

    ##### estimation and prediction
        dconfirmed = np.diff(confirmed)
        for ii in range(len(dconfirmed)):
            if dconfirmed[ii] < 0.: dconfirmed[ii] = 0.
        xd = dates[1:]

        sdays = 15
        yy = smooth(
            dconfirmed, sdays
        )  # smoothing over sdays (number of days) moving window, averages large chunking in reporting in consecutive days
        yy[-2] = (
            dconfirmed[-4] + dconfirmed[-3] + dconfirmed[-2]
        ) / 3.  # these 2 last lines should not be necesary but the data tend to be initially underreported and also the smoother struggles.
        yy[-1] = (dconfirmed[-3] + dconfirmed[-2] + dconfirmed[-1]) / 3.

        #lyyy=np.cumsum(lwy)
        TotalCases = np.cumsum(
            yy
        )  # These are confirmed cases after smoothing: tried also a lowess smoother but was a bit more parameer dependent from place to place.

        alpha = 3.  # shape parameter of gamma distribution
        beta = 2.  # rate parameter of gamma distribution see https://en.wikipedia.org/wiki/Gamma_distribution

        valpha = []
        vbeta = []

        pred = []
        pstdM = []
        pstdm = []
        xx = []
        NewCases = []

        predR = []
        pstRRM = []
        pstRRm = []

        anomalyday = []
        anomalypred = []

        for i in range(2, len(TotalCases)):
            new_cases = float(TotalCases[i] - TotalCases[i - 1])
            old_new_cases = float(TotalCases[i - 1] - TotalCases[i - 2])

            # This uses a conjugate prior as a Gamma distribution for b_t, with parameters alpha and beta
            alpha = alpha + new_cases
            beta = beta + old_new_cases
            valpha.append(alpha)
            vbeta.append(beta)

            mean = gamma.stats(a=alpha, scale=1 / beta, moments='m')

            RRest = 1. + infperiod * ln(mean)
            if (RRest < 0.): RRest = 0.
            predR.append(RRest)
            testRRM = 1. + infperiod * ln(
                gamma.ppf(0.99, a=alpha, scale=1. / beta)
            )  # these are the boundaries of the 99% confidence interval  for new cases
            if (testRRM < 0.): testRRM = 0.
            pstRRM.append(testRRM)
            testRRm = 1. + infperiod * ln(
                gamma.ppf(0.01, a=alpha, scale=1. / beta))
            if (testRRm < 0.): testRRm = 0.
            pstRRm.append(testRRm)

            if (new_cases == 0. or old_new_cases == 0.):
                pred.append(0.)
                pstdM.append(10.)
                pstdm.append(0.)
                NewCases.append(0.)

            if (new_cases > 0. and old_new_cases > 0.):
                NewCases.append(new_cases)

                # Using a Negative Binomial as the  Posterior Predictor of New Cases, given old one
                # This takes parameters r,p which are functions of new alpha, beta from Gamma
                r, p = alpha, beta / (old_new_cases + beta)
                mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk')

                pred.append(mean)  # the expected value of new cases
                testciM = nbinom.ppf(
                    0.99, r, p
                )  # these are the boundaries of the 99% confidence interval  for new cases
                pstdM.append(testciM)
                testcim = nbinom.ppf(0.01, r, p)
                pstdm.append(testcim)

                np = p
                nr = r
                flag = 0

                while (new_cases > testciM or new_cases < testcim):
                    if (flag == 0):
                        anomalypred.append(new_cases)
                        anomalyday.append(
                            dates[i + 1])  # the first new cases are at i=2

                    # annealing: increase variance so as to encompass anomalous observation: allow Bayesian code to recover
                    # mean of negbinomial=r*(1-p)/p  variance= r (1-p)/p**2
                    # preserve mean, increase variance--> np=0.8*p (smaller), r= r (np/p)*( (1.-p)/(1.-np) )
                    # test anomaly

                    nnp = 0.95 * np  # this doubles the variance, which tends to be small after many Bayesian steps
                    nr = nr * (nnp / np) * (
                        (1. - np) / (1. - nnp)
                    )  # this assignement preserves the mean of expected cases
                    np = nnp
                    mean, var, skew, kurt = nbinom.stats(nr,
                                                         np,
                                                         moments='mvsk')
                    testciM = nbinom.ppf(0.99, nr, np)
                    testcim = nbinom.ppf(0.01, nr, np)

                    flag = 1
                else:
                    if (flag == 1):
                        alpha = nr  # this updates the R distribution  with the new parameters that enclose the anomaly
                        beta = np / (1. - np) * old_new_cases

                        testciM = nbinom.ppf(0.99, nr, np)
                        testcim = nbinom.ppf(0.01, nr, np)

                        # annealing leaves the RR mean unchanged, but we need to adjus its widened CI:
                        testRRM = 1. + infperiod * ln(
                            gamma.ppf(0.99, a=alpha, scale=1. / beta)
                        )  # these are the boundaries of the 99% confidence interval  for new cases
                        if (testRRM < 0.): testRRM = 0.
                        testRRm = 1. + infperiod * ln(
                            gamma.ppf(0.01, a=alpha, scale=1. / beta))
                        if (testRRm < 0.): testRRm = 0.

                        pstRRM = pstRRM[:
                                        -1]  # remove last element and replace by expanded CI for RRest
                        pstRRm = pstRRm[:-1]
                        pstRRM.append(testRRM)
                        pstRRm.append(testRRm)

        # visualization of the time evolution of R_t with confidence intervals
        x = []
        for i in range(len(predR)):
            x.append(i)
        days = dates[3:]
        xd = days
        dstr = []
        for xdd in xd:
            dstr.append(xdd.strftime("%Y-%m-%d"))

        appenddf = pd.DataFrame({
            'state': state,
            'date': days,
            'RR_pred_luis': predR,
            'RR_CI_lower_luis': pstRRm,
            'RR_CI_upper_luis': pstRRM
        })
        returndf = pd.concat([returndf, appenddf], axis=0)

    returndf.to_csv(filepath / "luis_code_estimates.csv", index=False)
Esempio n. 13
0
 def test_ppf(self):
     n, p = sm.distributions.zinegbin.convert_params(5, 1, 1)
     nbinom_ppf = nbinom.ppf(0.71, n, p)
     zinbinom_ppf = sm.distributions.zinegbin.ppf(0.71, 5, 1, 1, 0)
     assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
from scipy.stats import nbinom
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

n, p = 0.4, 0.4
mean, var, skew, kurt = nbinom.stats(n, p, moments='mvsk')

# Display the probability mass function (``pmf``):

x = np.arange(nbinom.ppf(0.01, n, p), nbinom.ppf(0.99, n, p))
ax.plot(x, nbinom.pmf(x, n, p), 'bo', ms=8, label='nbinom pmf')
ax.vlines(x, 0, nbinom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)

# Alternatively, the distribution object can be called (as a function)
# to fix the shape and location. This returns a "frozen" RV object holding
# the given parameters fixed.

# Freeze the distribution and display the frozen ``pmf``:

rv = nbinom(n, p)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='-',
          lw=1,
          label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()
Esempio n. 15
0
def collect_and_plot_passes_nb(teams_list=None,
                               teams_dict=None,
                               plot_output=['single', 'all'],
                               teams_col_dict=None):

    team_sequences = {}
    dict_of_passing_stats = {}
    all_sequences = []

    for tm in teams_list:
        passing_stats = {}
        df = teams_dict[tm]
        list_of_dates = set(df['Date/Time'])

        date_sequences = {}
        for d in list_of_dates:
            df_filter = df[df['Date/Time'] == d]
            df_filter = df_filter[df_filter['Event Type'] != 'Cessation']
            opponent = df_filter['Opponent'].iloc[0]
            kee = str(d) + ' | ' + opponent
            date_sequences[kee] = get_sequences(df_filter)
        team_sequences[tm] = date_sequences
        counts = convert_date_sequences_to_list_and_count(date_sequences)
        all_sequences.extend(counts)

        x_values_for_barplot = [key for key, group in groupby(counts)]
        y_values_for_barplot = [
            i / sum([len(list(group)) for key, group in groupby(counts)])
            for i in [len(list(group)) for key, group in groupby(counts)]
        ]

        ## (GP) NB Estimation
        mu = sum(counts) / len(counts)
        sigma = math.sqrt(
            sum([(mu - float(i))**2
                 for i in counts]) / (len([(mu - float(i))**2
                                           for i in counts]) - 1))
        r = (mu**2) / (sigma**2 - mu)
        p = (mu) / (sigma**2)

        mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk')

        passing_stats['nb_probability'] = p
        passing_stats['nb_r'] = r
        passing_stats['avg_passes'] = mean
        passing_stats['var_passes'] = sigma**2
        passing_stats['nb_skew'] = skew
        passing_stats['nb_kurtosis'] = kurt

        dict_of_passing_stats[tm] = passing_stats

        if plot_output == 'single':
            x_values_for_nb = np.arange(nbinom.ppf(0.01, r, p),
                                        nbinom.ppf(0.9999, r, p))
            y_values_for_nb = nbinom.pmf(x_values_for_nb, r, p)

            fig = go.Figure(data=[
                go.Bar(x=x_values_for_barplot,
                       y=y_values_for_barplot,
                       marker_color=teams_col_dict[tm],
                       marker_line_color="black",
                       name="Passes Completed")
            ])

            fig.add_trace(
                go.Scatter(x=x_values_for_nb,
                           y=y_values_for_nb,
                           marker_color="black",
                           mode='lines',
                           name='Negative Binomial Approximation'))

            fig.update_layout(
                title="{}: Catch Counts, with Negative Binomial Estimation".
                format(tm),
                xaxis_title="n Number of Catches",
                yaxis_title="Frequency",
                boxmode='group',
                plot_bgcolor='rgb(220,220,220)')

            iplot(fig)

    all_sequences.sort()

    if plot_output == 'all':
        mu_a = sum(all_sequences) / len(all_sequences)
        sigma_a = math.sqrt(
            sum([(mu_a - float(i))**2 for i in all_sequences]) /
            (len([(mu_a - float(i))**2 for i in all_sequences]) - 1))
        r_a = (mu_a**2) / (sigma_a**2 - mu_a)
        p_a = (mu_a) / (sigma_a**2)

        mean_a, var_a, skew_a, kurt_a = nbinom.stats(r_a, p_a, moments='mvsk')

        x_values_for_barplot_a = [key for key, group in groupby(all_sequences)]
        y_values_for_barplot_a = [
            i /
            sum([len(list(group)) for key, group in groupby(all_sequences)])
            for i in
            [len(list(group)) for key, group in groupby(all_sequences)]
        ]

        x_values_for_nb_a = np.arange(nbinom.ppf(0.01, r_a, p_a),
                                      nbinom.ppf(0.9999, r_a, p_a))
        y_values_for_nb_a = nbinom.pmf(x_values_for_nb_a, r_a, p_a)

        fig = go.Figure(data=[
            go.Bar(x=x_values_for_barplot_a,
                   y=y_values_for_barplot_a,
                   marker_color="oldlace",
                   marker_line_color="black",
                   name="Passes Completed")
        ])

        fig.add_trace(
            go.Scatter(x=x_values_for_nb_a,
                       y=y_values_for_nb_a,
                       marker_color="black",
                       mode='lines',
                       name='Negative Binomial Approximation'))

        fig.update_layout(
            title=
            "League Wide Catch Counts Per Possession, with Negative Binomial Estimation",
            xaxis_title="n Number of Catches in a Possession",
            yaxis_title="Frequency",
            boxmode='group',
            plot_bgcolor='rgb(220,220,220)')

        iplot(fig)

    return (dict_of_passing_stats, team_sequences, all_sequences)
Esempio n. 16
0
def qzinegbin(p, size, pstr0, prob = None, munb = None, nVariables = None):
    """   Percent point function of a Zero Inflated Negative Binomial Distribution   """ # Same nomenclature that R function.
    # 1. Requirements:
    # Size, munb, prob and pstr0 must have the same length than p.
    # Given that each species is a variable and its mean would be different from others, the best way to do it is pass a vector of means, and vector of size
    # This function does NOT work with single values of size, prob, munb, and pstr0. Need lists
    nSpecies = p.shape[1]
    nSamples = p.shape[0]
    if isinstance(size, float) or isinstance(size, int):
        #print('Need a list with one value per variable. Providing the argument nVariables  the value will be repeated nVariables times')
        size = [size] * nVariables # Repeat the same size nVariables time
    if isinstance(prob, float) or isinstance(prob, int):
        #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times')
        prob = [prob] * nVariables
    if isinstance(munb, float) or isinstance(munb, int):
        #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times')
        munb = [munb] * nVariables
    if isinstance(pstr0, float) or isinstance(pstr0, int):
        #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times')
        pstr0 = [pstr0] * nVariables                  
    # 2. Repeated munb, size, prob and pstr0 by each value of the same variable
    if len(munb):
        prob = [s/(s + m) for s, m in zip(size, munb)]
    # Number of values
    LLL = max(len(p.flatten()), len(prob), len(pstr0), len(size), len(munb))
    p = np_rep_len(p.flatten(), LLL)
    if len(pstr0) != LLL:
        pstr0 = np_rep_len(pstr0, LLL)
    if len(prob) != LLL:
        prob = np_rep_len(prob, LLL)
    if len(size) != LLL:
        size = np_rep_len(size, LLL)
    if len(munb) != LLL:
        munb = np_rep_len(munb, LLL)
    # 3. Now everything have the proper length (same values -> same distribution for each variable). 
    # 3.1 Create empty list (should be list to mix 'NA' strings with float, in numpy.array is valid just one datatype)
    ans = list(np.repeat(float('nan'), LLL))
    prob0 = [p**s for p, s in zip(prob, size)]
    deflat_limit=[]
    for i in range(len(prob0)):
        if (1 - prob0[i]) == 0: #1- prob0[i] = 0 only when prob0[i] = 1
            deflat_limit.append(float('-inf'))
            #elif prob0[i] < 0: 
            #    deflat_limit[i] = float('inf')
        elif ((1 - prob0[i]) == 0 and prob0[i] == 0):
            deflat_limit.append(float('nan'))
        else:
            deflat_limit.append(-prob0[i] / (1 - prob0[i]))
    for i in range(len(ans)):
        if p[i] <= pstr0[i]:
            ans[i] = 0
    ind4 =  [(pstr0[i] < p[i]) and (deflat_limit[i] <= pstr0[i]) for i in range(len(p))]
    q = [(p[i] - pstr0[i]) / (1 - pstr0[i]) for i in range(len(p)) if ind4[i]]
    n = [size[i] for i in range(len(size)) if ind4[i]]
    pr = [prob[i] for i in range(len(prob)) if ind4[i]]
    j = 0
    for i in range(len(ind4)):
        if ind4[i]:
            ans[i] = nbinom.ppf(q = q[j], n = n[j], p = pr[j], loc = 0) # This function is not exactly equal to R function. R function return 0 and warnings in some cases, an python return nan
            j = j +1      
    for i in range(len(ans)):
        if pstr0[i] < deflat_limit[i]:
            ans[i] = float('nan')
        if 1 < pstr0[i]:
            ans[i] = float('nan')
        if p[i] < 0:
            ans[i] = float('nan')
        if 1 < p[i]:
            ans[i] = float('nan')
    return(np.array(ans).reshape(nSamples, nSpecies))
Esempio n. 17
0
plt.clf()

fig, ax = plt.subplots(1, 1)
a = 2
b = 3
mean, var, skew, kurt = gamma.stats(a, moments='mvsk')
print(mean,var,skew,kurt)
mean, var, skew, kurt = gamma.stats(a, scale=b, moments='mvsk')
print(mean,var,skew,kurt)
x = np.linspace(gamma.ppf(0.01, a), gamma.ppf(0.99, a), 100)
ax.plot(x, gamma.pdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma-a=2,b=1 pdf')
ax.plot(x, gamma.pdf(x, a, scale = b), 'r-', lw=5, alpha=0.6, label='gamma-a=2,b=3 pdf')
ax.legend(loc='best', frameon=False)
plt.show()

fig, ax = plt.subplots(1, 1)
n,p=5,0.5
mean, var, skew, kurt = nbinom.stats(n,p, moments='mvsk')
print(mean,var,skew,kurt)
start = nbinom.ppf(0.01, n,p)
stop  = nbinom.ppf(0.99, n,p)
x = np.linspace(nbinom.ppf(0.01, n,p), nbinom.ppf(0.99, n,p), num = int(stop-start+1))
print(x)
ax.plot(x, nbinom.pmf(x, n, p), 'bo', ms=8, label='nbinom pdf')
ax.plot(x, poisson.pmf(x, 5), 'ro', ms=8, label='poisson pdf')
ax.legend(loc='best', frameon=False)
plt.show()



Esempio n. 18
0
def analytical_MPVS(
        infection_ts: pd.DataFrame, 
        smoothing: Callable,
        alpha: float = 3.0,                # shape 
        beta:  float = 2.0,                # rate
        CI:    float = 0.95,               # confidence interval 
        infectious_period: int = 5*days,   # inf period = 1/gamma,
        variance_shift: float = 0.99,      # how much to scale variance parameters by when anomaly detected 
        totals: bool = True                # are these case totals or daily new cases?
    ):
    """Estimates Rt ~ Gamma(alpha, 1/beta), and implements an analytical expression for a mean-preserving variance increase whenever case counts fall outside the CI defined by a negative binomial distribution"""
    # infection_ts = infection_ts.copy(deep = True)
    dates = infection_ts.index
    if totals:
        # daily_cases = np.diff(infection_ts.clip(lower = 0)).clip(min = 0) # infection_ts clipped because COVID19India API does weird stuff
        daily_cases = infection_ts.clip(lower = 0).diff().clip(lower = 0).iloc[1:]
    else: 
        daily_cases = infection_ts 
    total_cases = np.cumsum(smoothing(np.squeeze(daily_cases)))

    v_alpha, v_beta = [], []

    RR_pred, RR_CI_upper, RR_CI_lower = [], [], []

    T_pred, T_CI_upper, T_CI_lower = [], [], []

    new_cases_ts = []

    anomalies     = []
    anomaly_dates = []

    for i in range(2, len(total_cases)):
        new_cases     = max(0, total_cases[i]   - total_cases[i-1])
        old_new_cases = max(0, total_cases[i-1] - total_cases[i-2])

        alpha += new_cases
        beta  += old_new_cases
        v_alpha.append(alpha)
        v_beta.append(beta)

        RR_est   = max(0, 1 + infectious_period*np.log(Gamma.mean(     a = alpha, scale = 1/beta)))
        RR_upper = max(0, 1 + infectious_period*np.log(Gamma.ppf(CI,   a = alpha, scale = 1/beta)))
        RR_lower = max(0, 1 + infectious_period*np.log(Gamma.ppf(1-CI, a = alpha, scale = 1/beta)))
        RR_pred.append(RR_est)
        RR_CI_upper.append(RR_upper)
        RR_CI_lower.append(RR_lower)

        if (new_cases == 0 or old_new_cases == 0):
            if new_cases == 0:
                logger.debug("new_cases at time %s: 0", i)
            if old_new_cases == 0:
                logger.debug("old_new_cases at time %s: 0", i)
            T_pred.append(0)
            T_CI_upper.append(10) # <- where does this come from?
            T_CI_lower.append(0)
            new_cases_ts.append(0)

        if (new_cases > 0 and old_new_cases > 0):
            new_cases_ts.append(new_cases)

            r, p = alpha, beta/(old_new_cases + beta)
            T_pred.append(nbinom.mean(r, p))
            T_upper = nbinom.ppf(CI,   r, p)
            T_lower = nbinom.ppf(1-CI, r, p)
            T_CI_upper.append(T_upper)
            T_CI_lower.append(T_lower)

            _np = p
            _nr = r 
            anomaly_noted = False
            counter = 0
            while not (T_lower < new_cases < T_upper):
                if not anomaly_noted:
                    anomalies.append(new_cases)
                    anomaly_dates.append(dates[i])
                
                # logger.debug("anomaly identified at time %s: %s < %s < %s, r: %s, p: %s, annealing iteration: %s", i, T_lower, new_cases, T_upper, _nr, _np, counter+1)
                # nnp = 0.95 *_np # <- where does this come from 
                _nr = variance_shift * _nr * ((1-_np)/(1-variance_shift*_np) )
                _np = variance_shift * _np 
                T_upper = nbinom.ppf(CI,   _nr, _np)
                T_lower = nbinom.ppf(1-CI, _nr, _np)
                T_lower, T_upper = sorted((T_lower, T_upper))
                if T_lower == T_upper == 0:
                    T_upper = 1
                    logger.debug("CI collapse, setting T_upper -> 1")
                anomaly_noted = True

                counter += 1
                if counter >= 10000:
                    raise ValueError("Number of iterations exceeded")
            else:
                if anomaly_noted:
                    alpha = _nr # update distribution on R with new parameters that enclose the anomaly 
                    beta = _np/(1-_np) * old_new_cases

                    T_pred[-1] = nbinom.mean(_nr, _np)
                    T_CI_lower[-1] = nbinom.ppf(CI,   _nr, _np)
                    T_CI_upper[-1] = nbinom.ppf(1-CI, _nr, _np)

                    # annealing leaves the RR mean unchanged, but we need to adjust its widened CI
                    RR_upper = max(0, 1 + infectious_period * np.log(Gamma.ppf(CI    , a = alpha, scale = 1/beta)))
                    RR_lower = max(0, 1 + infectious_period * np.log(Gamma.ppf(1 - CI, a = alpha, scale = 1/beta)))

                    # replace latest CI time series entries with adjusted CI 
                    RR_CI_upper[-1] = RR_upper
                    RR_CI_lower[-1] = RR_lower
    return (
        dates[2:], 
        RR_pred, RR_CI_upper, RR_CI_lower, 
        T_pred, T_CI_upper, T_CI_lower, 
        total_cases, new_cases_ts, 
        anomalies, anomaly_dates
    )
Esempio n. 19
0
 def ssq(obs, n, p):
     exp = nbinom.ppf(q, n, p)
     ssq = np.sum([(x - exp[i]) ** 2 for i, x in enumerate(obs)])
     return (ssq)
Esempio n. 20
0
    :param neg_binom_r_param: int (could be float too), parameter of neg binom distribution
    :param count_data: np array, count values we model with negative binomial
    :return: float, log likelihood
    """
    num_counts = len(count_data)
    p = 1 - sum(count_data) / (num_counts * neg_binom_r_param + sum(count_data))

    llh = sum(nbinom.logpmf(count_data, neg_binom_r_param, p))
    return llh


# set parameters
n, p = 10, 0.4

# generate x values and get pmf
x = np.arange(nbinom.ppf(0.01, n, p), nbinom.ppf(0.99, n, p))
pmf = nbinom.pmf(x, n, p)

# plot
plt.plot(pmf)
plt.axvline(x=n)
plt.show()

# check whether peak occurs at correct value for n (r).
r_vals = np.arange(5, 20)
llh = np.zeros(len(r_vals))
counts = np.random.negative_binomial(n, p, size=100000)
for ii, r in enumerate(r_vals):
    llh[ii] = max_llh_given_r_param(r, counts)

# plot
Esempio n. 21
0
 def test_ppf_p2(self):
     n, p = sm.distributions.zinegbin.convert_params(100, 1, 2)
     nbinom_ppf = nbinom.ppf(0.27, n, p)
     zinbinom_ppf = sm.distributions.zinegbin.ppf(0.27, 100, 1, 2, 0)
     assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
Esempio n. 22
0
 def _rvs(self, n, p):
     return nbinom.ppf(uniform(low=nbinom.pmf(0, n, p)), n, p)
Esempio n. 23
0
 def _ppf(self, q, n, p):
     return nbinom.ppf(nbinom.sf(0, n, p) * q + nbinom.pmf(0, n, p), n, p)
Esempio n. 24
0
 def get_ui(self, params: List[ndarray], bounds: Tuple[float, float]) -> np.ndarray:
     n = params[0]
     p = params[1]
     return [nbinom.ppf(bounds[0], n=n, p=p),
             nbinom.ppf(bounds[1], n=n, p=p)]