Ejemplo n.º 1
0
def plot_poisson():
    fig, ax = plt.subplots(1, 1)

    # This is prediction for Wawrinka in 2014
    mu = 7.869325

    x = np.arange(poisson.ppf(0.01, mu), poisson.ppf(0.999, mu))
    ax.plot(x, poisson.pmf(x, mu), 'wo', ms=8, label='poisson pmf')
    ax.vlines(x, 0, poisson.pmf(x, mu),
              colors=['b', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g'], lw=5, alpha=0.5)

    rv = poisson(mu)
    ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf')

    plt.title("Stanislas Wawrinka")
    plt.xlabel('# QF+ Finishes in 2014')
    plt.ylabel('Probability')

    prob0 = poisson.cdf(6, mu)
    prob123 = poisson.cdf(9, mu) - poisson.cdf(6, mu)
    probAbove3 = poisson.cdf(10000, mu) - poisson.cdf(9, mu)
    print prob0
    print prob123
    print probAbove3

    plt.show()
    def testprobabilitiespoisson(self):
        prob = zeros((4,3))
        exp_value = self.data.calculate_equation(self.coefficients[0])
        prob[:,0] = poisson.pmf(0, exp_value)
        prob[:,1] = poisson.pmf(1, exp_value)
        prob[:,2] = 1 - poisson.cdf(1, exp_value)

        prob_model = self.model.calc_probabilities(self.data)
        prob_diff = all(prob == prob_model)
        self.assertEqual(True, prob_diff)
Ejemplo n.º 3
0
def lrtest(data,xvars): # Likelihood ratio test between input model and null "traffic only" model
    pedinj_count = data['pedinj_count']
    xvars_0 = shape_xvars(data[['pvtraf']])
    xvars_1 = shape_xvars(data[xvars])
    inj_pred_0 = bilin_regr(data,['pvtraf'],plot=False).predict(xvars_0)
    inj_pred_1 = bilin_regr(data,xvars,plot=False).predict(xvars_1)
    LL_0 = np.log(poisson.pmf(pedinj_count,inj_pred_0)).sum()
    LL_1 = np.log(poisson.pmf(pedinj_count,inj_pred_1)).sum()
    TS = -2 * (LL_0 - LL_1)
    print "TS, LL_1, LL_0 = ", TS, LL_1, LL_0
Ejemplo n.º 4
0
def _error( value ) :
  '''Construct Bayesian errors using Poisson distribution'''
  # likelihood = P(value|lambda) using underlying Poisson assumption
  # error: lambdas with equal likelihood for which area in between is 68%
  lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10
  for i in range(5) :
    lambda_up -= step_size; lambda_down += step_size; step_size /= 10
    while (poisson.cdf(value,lambda_down) - poisson.cdf(value,lambda_up)) < 0.6826894921370859 :
      lambda_up += step_size
      while poisson.pmf(value,lambda_down) > poisson.pmf(value,lambda_up) : lambda_down -= step_size/10
  return (value-lambda_down,lambda_up-value)
Ejemplo n.º 5
0
def poisson_marginals(means, accuracy=1e-10):
    """
    Finds the probability mass functions (pmfs) and approximate supports of a set of
    Poisson random variables with means specified in input "means". The
    second argument, "acc", specifies the desired degree of accuracy. The
    "support" is taken to consist of all values for which the pmfs is greater
    than acc.

    Inputs:
    means: the means of the Poisson RVs
    acc: desired accuracy

    Outputs:
    pmfs: a cell-array of vectors, where the k-th element is the probability
    mass function of the k-th Poisson random variable.
    supports: a cell-array of vectors, where the k-th element is a vector of
    integers of the states that the k-th Poisson random variable would take
    with probability larger than "acc". E.g., P(kth
    RV==supports{k}(1))=pmfs{k}(1);

    Code from the paper: 'Generating spike-trains with specified
    correlations', Macke et al., submitted to Neural Computation

    Adapted from `<http://www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_
    
    Parameters
    ----------
    means : Type
        Description
    accuracy : int, optional
        Description (default 1e-10)
    
    Returns
    -------
    Value : Type
        Description
    """
    from scipy.stats import poisson
    import math

    cmfs = []
    pmfs = []
    supps = []

    for k in range(len(means)):
        cmfs.append(poisson.cdf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k]))
        pmfs.append(poisson.pmf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k]))
        supps.append(np.where((cmfs[k] <= 1 - accuracy) & (pmfs[k] >= accuracy))[0])
        cmfs[k] = cmfs[k][supps[k]]
        pmfs[k] = poisson.pmf(supps[k], means[k])

    return np.array(pmfs), np.array(cmfs), np.array(supps)
Ejemplo n.º 6
0
def getExpected(mu):
    """
    Given a mean coverage mu, determine the AUC, X-intercept, and elbow point
    of a Poisson-distributed perfectly behaved input sample with the same coverage
    """
    x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1))  # This will be an appropriate range
    pmf = poisson.pmf(x, mu=mu)
    cdf = poisson.cdf(x, mu=mu)
    cs = np.cumsum(pmf * x)
    cs /= max(cs)
    XInt = cdf[np.nonzero(cs)[0][0]]
    AUC = sum(poisson.pmf(x, mu=mu) * cs)
    elbow = cdf[np.argmax(cdf - cs)]
    return (AUC, XInt, elbow)
Ejemplo n.º 7
0
def generate_q0_via_shape_fit(data, bin_edges, binned_model, binned_params):
    '''Generate likelihood ratios based on a template fit to the data.
    Shape values for bg and signal are determined from integration of
    underlying pdfs used to generate toys.
    Use these values to create the q0 statistic.'''

    bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180))
    ibc = np.asarray(range(len(bc)))
    result = binned_model.fit(bc, ix=ibc, params=binned_params)
    nll_bg = -np.sum(np.log(poisson.pmf(bc, result.eval(A=0))))
    nll_sig = -np.sum(np.log(poisson.pmf(bc, result.best_fit)))

    q0 = 2*(nll_bg-nll_sig)
    return q0
Ejemplo n.º 8
0
def generate_q0_via_bins(data, bin_edges, true_bg_bc, true_sig_bc):
    '''Generate likelihood ratios based on poisson distributions for each bin
    in binned data.  True values for bg and bg+signal are determined from integration of
    underlying pdfs used to generate toys.
    Use these values to create the q0 statistic.'''

    bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180))
    l_bg  = 1
    l_sig = 1
    for i in range(len(bin_edges)-1):
        l_bg  *= poisson.pmf(bc[i], true_bg_bc[i])
        l_sig *= poisson.pmf(bc[i], true_bg_bc[i]+true_sig_bc[i])

    q0 = -2*(np.log(l_bg)-np.log(l_sig))
    return q0
Ejemplo n.º 9
0
def poisson():
    from scipy.stats import poisson
    fig, ax = plt.subplots(figsize=(14,7))
    y = np.asarray(range(0, 16))
    p1 = poisson.pmf(y, mu=1.)
    p3 = poisson.pmf(y, mu=3.)
    p10 = poisson.pmf(y, mu=10.)

    ax.plot(y, p1, 'r.-', markersize=20, label='$\lambda=1$', lw=3)
    ax.plot(y, p3, 'g.-', markersize=20, label='$\lambda=3$', lw=3)
    ax.plot(y, p10, 'b.-', markersize=20, label='$\lambda=10$', lw=3)
    ax.set_title('Poisson Distribution', fontsize=20)
    ax.set_xlabel('$y_i$', fontsize=20)
    ax.set_ylabel('$p(y_i)$', fontsize=20)
    ax.legend(fontsize=20)
    plt.savefig('./diagrams/poisson.svg')
Ejemplo n.º 10
0
def plot(dist_name, edge):
    edges = defaultdict(int)
    if dist_name == 'binomial':
        dist = Dist.binomial
        lam = 1
    elif dist_name == 'geometric':
        dist = Dist.geometric
        lam = 1.5
    else:
        raise ValueError("Wrong dist argument")

    if edge == 'loop':
        count = count_loops
    elif edge == 'parallel':
        count = count_parallel_edges
        lam **= 2
    else:
        raise ValueError('Wrong edge argument')

    for _ in range(1000):
        g = gen_fixed_degree_graph(dist, 250)
        edges[count(g)] += 1

    x = np.array(list(edges.keys()))
    y = np.array(list(edges.values()))
    y = y / np.sum(y)
    p_y = poisson.pmf(x, lam)
    plt.style.use('ggplot')
    experimental = plt.scatter(x, y)
    theoretical = plt.scatter(x, p_y, c='r', marker='+')
    plt.legend((experimental, theoretical), ('Experimental', 'Theoretical'))
    plt.title(edge + ' ' + dist_name)
    plt.xlabel('N')
    plt.ylabel('Probability')
    plt.show()
Ejemplo n.º 11
0
 def test_Na(self):
     d = 3
     for n in range(5):
         lambda_ = (3/4) * (self.r + self.c)
         Na_obs = self.R._Na(n, d)
         Na_exp = log(poisson.pmf(n, lambda_))
         assert Na_obs == Na_exp
def draw_degree_distribution(g, mu):
    """
    Draws the degree distribution of a graph and Poisson fit
    """

    from scipy.stats import poisson

    d = g.get_degree_distribution()

    v1 = [x/sum(d.values()) for x in d.values()]

    # sorted as we need to draw the line
    sorted_d = sorted(d.keys())
    v2 = poisson.pmf(sorted_d, mu)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    width = 0.35
    ax.bar(np.array(d.keys()) - width/2, v1, width, color='m', label='data')
    ax.plot(sorted_d, v2, 'c--', label='Poisson')

    ax.set_xlabel('degree')
    ax.set_ylabel('probability')
    ax.set_title("{0} Nodes".format(g._node))
    ax.legend()

    plt.show()
Ejemplo n.º 13
0
def get_c_constant(lamb, N):
    total = 0
    
    for k in range(1,N):
        total += k*poisson.pmf(k, lamb)
        
    return total
Ejemplo n.º 14
0
def calculateClassProbability(num_works,ngram_dict,testdata):
    prob = {"Dickens":0.0,"Twain":0.0}
    for author in prob:
        author_dict = ngram_dict[author]
        log_product = 0.0
        denom = num_works[author]
        totaldict=gensum(author_dict)
        totalvec=gensum(testdata)
        for ngram in testdata:
            if ngram in author_dict:
                lamb = author_dict[ngram]/9
                pmf = poisson.pmf(testdata[ngram],mu=lamb)
                if pmf > 0:
                    log_product += math.log(poisson.pmf(testdata[ngram],mu=lamb))
        prob[author] = log_product
    return prob
def EPM_Poisson_countd(mu, library_size):
    '''Returns the Poisson mutation rate distribution for a given library size

    Average rate is set by mu, library size is the number of sequnces in the library
    Returns two lists, probs_list contains the number of sequences with the corresponding number of mutations in mut_list
    '''

    probs_list = []
    mut_list = []
    alpha = 1-1/(library_size*10)
    a,b = poisson.interval(alpha, mu, loc=0)
    a = int(a)
    b = int(b)
    for k in range(a,b+1):
        k_count = int(round(poisson.pmf(k,mu)*library_size,0))
        if k_count != 0:
            probs_list.append(k_count)
            mut_list.append(k)

    #If, due to rounding, the total library size is greater than expected
    #Subtract the difference from the mean (mu)

    dif = sum(probs_list) - library_size
    mutation_list = [i for i in range(a,b+1)]
    index = mutation_list.index(mu)
    probs_list[index] -= dif

    return probs_list, mut_list
Ejemplo n.º 16
0
def lotteryProfit(lastWin, addWin):
    #Icelandic lottery param
    tPrice=130
    rPay=0.45
    rFP=0.57
    numbers=40
    balls=5
    NsplitMax=21

#  [lastWinMat, addWinMat]=np.meshgrid(lastWin, addWin)

    #Number of possible winners
    Nsplit=np.arange(0,NsplitMax)
    
    #Probability of winning
    Ncomb=comb(numbers,balls)
    pWin=1./Ncomb

    #Number of rows bought
    N=addWin/(rPay*rFP*tPrice)

    #Probability of splitting the jackpot
    p=poisson.pmf(Nsplit, pWin*N)

    #ROI of buying all possible rows
    costAll=Ncomb*tPrice
    myWin=lastWin+addWin+costAll*rPay*rFP
    smallerWin=(1.-rFP)*rPay*costAll
    weightedWin=np.sum(p/(Nsplit+1.)*myWin)
    totWin=weightedWin+smallerWin
    profit=totWin-costAll
    rprof=profit/costAll
    return rprof
Ejemplo n.º 17
0
def P_number_true_obs_fast(args):
	E_true_links = 0
	p_at_least_one_bp_at_given_position = 1- P_breakpoints_in_interval(1, args.bp_ratio, 0)
	k = 2 * args.readlen / float(args.cov)
	
	for i in range((args.insertion_size + args.readlen - args.soft)+1, args.mean + 4*args.stddev):
		# When internal breakpoint occur within mean + 4*sigma
		E_true_links += 2*(1/k) * (v(i,args.insertion_size, args.readlen, args.soft) - 1 ) * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position**2
		# when no breakpoint occurs on one side
		E_true_links += 2*(1/k) * 1                                                        * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position
		# when no breakpoint occurs on both sides
		E_true_links += (1/k) * 1 															* norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)

		#print v(i,args.insertion_size, args.readlen, args.soft)
	# when no breakpoint occurs on one side
	i = args.mean + 4*args.stddev
	E_true_links += 2*(1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position

	# when no breakpoint occurs on both sides
	i = args.mean + 4*args.stddev
	print v(i,args.insertion_size, args.readlen, args.soft)
	print 1/k
	E_true_links += (1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)

 
	return E_true_links
Ejemplo n.º 18
0
def ztp(N, lambda_):
    """Zero truncated Poisson distribution"""
    temp = [poisson.pmf(0, item) for item in lambda_]
    p = [uniform.rvs(loc=item, scale=1-item) for item in temp]
    ztp = [int(poisson.ppf(p[i], lambda_[i])) for i in range(len(p))]

    return np.array(ztp)
Ejemplo n.º 19
0
def _I_ER(dd, beta, eps):
    n = len(dd)
    mu_bar = np.dot(np.arange(n), dd)
    poisson_pdf = poisson.pmf(range(n), beta)
    return KL_div(dd, poisson_pdf, eps) + 0.5 * (mu_bar - beta) + \
        0.5 * mu_bar * np.log(beta) - \
        0.5 * xlogx(mu_bar)
def test_discreteExFam():

    X = np.arange(100)
    pois = discrete_family(X, poisson.pmf(X, 1))
    tol = 1e-5

    print (pois._leftCutFromRight(theta=0.4618311,rightCut=(5,.5)), pois._test2RejectsLeft(theta=2.39,observed=5,auxVar=.5))
    print pois.interval(observed=5,alpha=.05,randomize=True,auxVar=.5)

    print abs(1-sum(pois.pdf(0)))
    pois.ccdf(0, 3, .4)

    print pois.Var(np.log(2), lambda x: x)
    print pois.Cov(np.log(2), lambda x: x, lambda x: x)

    lc = pois._rightCutFromLeft(0, (0,.01))
    print (0,0.01), pois._leftCutFromRight(0, lc)

    pois._rightCutFromLeft(-10, (0,.01))
    #[pois.test2Cutoffs(t)[1] for t in range(-10,3)]
    pois._critCovFromLeft(-10, (0,.01))

    pois._critCovFromLeft(0, (0,.01))
    pois._critCovFromRight(0, lc)

    pois._critCovFromLeft(5, (5, 1))

    pois._test2RejectsLeft(np.log(5),5)
    pois._test2RejectsRight(np.log(5),5)

    pois._test2RejectsLeft(np.log(20),5)
    pois._test2RejectsRight(np.log(.1),5)

    print pois._inter2Upper(5,auxVar=.5)
    print pois.interval(5,auxVar=.5)
Ejemplo n.º 21
0
    def coverage_probability(self,nr_obs, a, mean_lib, stddev_lib,z, coverage_mean, read_len, s_inner,s_outer, b=None, coverage_model = False):
        ''' Distribution P(o|c,z) for prior probability over coverage.
            This probability distribution is implemented as an poisson 
            distribution.

            Attributes:

            c       -- coverage
            mean    -- mean value of poisson distribution.

            Returns probability P(c)

        '''
        if not b: 
            # only one reference sequence.
            # We split the reference sequence into two equal 
            # length sequences to fit the model. 
            a = a/2
            b = a/2

        param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner,s_outer)
        lambda_ = mean_span_coverage(a, b, z, param)

        if coverage_model == 'Poisson':
            return poisson.pmf(nr_obs, lambda_, loc=0)

        elif coverage_model == 'NegBin':
            p = 0.01
            n = (p*lambda_)/(1-p)
            return nbinom.pmf(nr_obs, n, p, loc=0) 
        else:
            # This is equivalent to uniform coverage
            return 1 #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
def lower_bound_imperct_cdmd(intensity, epsilon, p_f, mu=0, delta=1):
    """
        经过数学分析,in case of imperfect CDMA, the upper bound of average transmit power is:
            \mu_t \exp{1/2 (1n(1/10)\sigma/10)^2} E[1/(mu*g)]
        其中,经过计算:
        E[1/(mu g)] = mu * (RADIUS**(GAMMA+2)-RADIUS_INNER**(GAMMA+2))*2/((RADIUS**2-RADIUS_INNER**2)*(GAMMA+2)*RADIUS**GAMMA)

        @:parameter
        indensity: float, arrival request density
        mu: float, default value 0, mean of normal distribution
        delta: float, default value 1, variance of normal distribution
    """
    p_c = (p_f-epsilon)/(1.0-epsilon)
    proba_distribution_poisson = [poisson.pmf(k, intensity) for k in range(int(intensity+200))]
    cumulative = 0.0
    n_mean = 0
    while cumulative <= 1-epsilon:
        cumulative += proba_distribution_poisson[n_mean]
        n_mean += 1
    N_c = math.ceil(math.log(1+(n_mean-1)/p_c, 2))
    mu_t = pow(2, L*N_c/W/TAU_S)-1
    print "mu_t", mu_t, "N_c", N_c, "N", n_mean, "Nc - mu_t*(N-1)", N_c-mu_t*(n_mean-1)

    CONSTANT_1 = (1.0/N_c)*(1.0/MU)*(RADIUS**(GAMMA+2)-RADIUS_INNER**(GAMMA+2))*2/((RADIUS**2-RADIUS_INNER**2)*(GAMMA+2)*RADIUS**GAMMA)
    CONSTANT_2 = math.exp(0.5*(mu+math.log(0.1, np.e)*delta/10.0)**2)
    p_t = mu_t * CONSTANT_1*CONSTANT_2
    try:
        p_t = 10*math.log(1000*p_t, 10)
    except ValueError:
        print "Math domain error,p_t is:{0} when i is:{1}".format(p_t, i)
        print "CONSTANT 2:{0}".format(CONSTANT_2)

    logger.debug("lamdba:{0} N_bar:{1} N_C:{2} p_t:{3}".format(intensity, n_mean, N_c, p_t))
    print "CONSTANT_1,CONSTANT_2,p_t", CONSTANT_1, CONSTANT_2, p_t
    return p_t
Ejemplo n.º 23
0
def test_montecarlo():
    """
    With a three node net, we can test Direct_Sample against numerical
    integration. 
    """
    A = Node("A", ["B"], {"B": np.array([[1,0],[1,.2]])})
    B = Node("B", ["C"], {"C": np.array([[1,0],[1,.4]])})
    C = Node("C", [], {})
    net = CyberNet([A,B,C])
    T=10
    data = gen_data(T, net, {"A": "infected", "B":"normal", "C": "normal"})
    dsres = Direct_Sample(net, data, 10000, 10, {"A": "infected",
        "B":"normal", "C":"Normal"})
    probfroma = np.log(poisson.pmf(np.sum(data[2]=="A"), 12))
    def integrand(zbar, T=T, data=data):
        fromb_times = data[1][data[2]=="B"]
        #total = len(fromb_times)
        numbefore = np.sum(fromb_times<=zbar)
        numafter = np.sum(fromb_times>zbar)
        pbefore = zbar**numbefore*np.exp(-zbar)/float(factorial([numbefore])[0])
        pafter = (1.4*(T-zbar))**numafter*np.exp(-1.4*(T-zbar))/float(factorial([numafter])[0])
        return pbefore*pafter*.2*np.exp(-.2*zbar)
    total = len(data[1][data[2]=="B"])
    num_integral = integrate.quad(integrand, 0,10, epsabs=.01) + \
               np.exp(-2)*10**total*np.exp(-10)/float(factorial([total])[0])
    np.testing.assert_allclose(np.log(num_integral[0]) + probfroma, dsres, atol=0, rtol=.01) #relative test
def CalcMultivariateMultinomial(path_cvs, skip, noreplace):
    DF_PROB = Read_cvs(path_cvs, skip)
    print DF_PROB
    NGRB = len(DF_PROB.index)
    dct_combis = {}
    prob_sum = 0.
    combi_time = {}

    for dct_coinci in LST_NCOINCI:
        print '===================='
        print dct_coinci
        for keyt in DCT_TINTERVAL.keys(): #preparation for next loop
            if noreplace==False:
                dct_combis[keyt] = list(itertools.combinations_with_replacement(DF_PROB.index, dct_coinci[keyt]))
            else:
                dct_combis[keyt] = list(itertools.combinations(DF_PROB.index, dct_coinci[keyt]))
            print keyt, dct_combis[keyt]
        #for combi_time['T1'], combi_time['T2'], combi_time['T3'] in itertools.product(dct_combis.values()):
        for combi_time['T1'] in dct_combis['T1']:
            for combi_time['T2'] in dct_combis['T2']:
                for combi_time['T3'] in dct_combis['T3']:
                    print combi_time['T1'], combi_time['T2'], combi_time['T3']
                    prob = 1.
                    for grb in DF_PROB.index:
                        for keytime, strtime in DCT_TINTERVAL.items():
                            prob = prob * poisson.pmf(combi_time[keytime].count(grb), DF_PROB.ix[grb][strtime])
                    print prob
                    sys.stdout.flush()
                    prob_sum += prob
    print 'Total probability:', prob_sum
    return prob_sum
Ejemplo n.º 25
0
 def _get_poisson(self, x, p):
     if lookup_poisson.has_key((x, p)):
             value_poisson = lookup_poisson[(x, p)]
     else:
         value_poisson = poisson.pmf(x, p)
         lookup_poisson[(x, p)] = value_poisson
     return value_poisson    
Ejemplo n.º 26
0
def generalized_ln_poisson(data,expectation):
    """
    When the data set is not integer based, we need a different way to
    calculate the poisson likelihood, so we'll use this version, which
    is appropriate for float data types (using the continuous version
    of the poisson pmf) as well as the standard integer data type for
    the discrete Poisson pmf.

    Returns: the natural logarithm of the value of the continuous form
    of the poisson probability mass function, given detected counts,
    'data' from expected counts 'expectation'.
    """

    if not np.alltrue(data >= 0.0):
        raise ValueError(
            "Template must have all bins >= 0.0! Template generation bug?")

    ln_poisson = 0.0
    if bool(re.match('^int',data.dtype.name)):
        return np.log(poisson.pmf(data,expectation))
    elif bool(re.match('^float',data.dtype.name)):
        return (data*np.log(expectation) - expectation - multigammaln(data+1.0,1))
    else:
        raise ValueError(
            "Unknown data dtype: %s. Must be float or int!"%psuedo_data.dtype)
    def calc_probabilities(self, data):
        """
        The method returns the probabilities for the different count alternatives
        for the choice variable under consideration. Based on whether
        model is specified as poisson/negative-binomial, the appropriate
        probabilities are calculated.

        Inputs:
        data - DataArray object
        """
        #TODO: what are the parameters for the negative binomial distribution
        #[shape_param] = [1,]*nbinom.numargs
        expected_value = self.calc_expected_value(data)
        num_choices = self.specification.number_choices
        probabilities = zeros((data.rows, num_choices))
        for i in range(num_choices-1):
            if self.distribution == 'poisson':
                probabilities[:,i] = poisson.pmf(i, expected_value)
            else:
                #TODO: implement negative binomial probabilities
                pass

        if self.distribution == 'poisson':
            probabilities[:,-1] = 1 - probabilities.cumsum(-1)[:,-1]
        else:
            #TODO: implement negative binomial probabilities
            pass
        return probabilities
Ejemplo n.º 28
0
def make_poisson():
    x = np.array(range(20))
    x = np.linspace(0, 20, 20)
    p = poisson.pmf(x, l)
    print x
    print p
    return p
Ejemplo n.º 29
0
    def compute_ll(self):
        ll = 0

        for user, movie in self.nonzero_indices:
            assert self.ratings[user, movie] > 0
            ll += np.log(poisson.pmf(self.ratings[user, movie], np.dot(self.thetas[user, :], self.betas[movie, :])))

        for user in xrange(self.nusers):
            try:
                assert gammapdf(self.xis[user], self.ap, self.ap / self.b) > 0
            except AssertionError:
                print self.xis[user], self.ap, self.ap / self.b, gammapdf(self.xis[user], self.ap, self.ap / self.b)
                raise
            ll += np.log(gammapdf(self.xis[user], self.ap, self.ap / self.b))
            for topic in xrange(self.ntopics):
                assert gammapdf(self.thetas[user, topic], self.a, self.xis[user]) > 0
                ll += np.log(gammapdf(self.thetas[user, topic], self.a, self.xis[user]))

        for movie in xrange(self.nmovies):
            assert gammapdf(self.etas[movie], self.cp, self.cp / self.d) > 0
            ll += np.log(gammapdf(self.etas[movie], self.cp, self.cp / self.d))
            for topic in xrange(self.ntopics):
                assert gammapdf(self.betas[movie, topic], self.c, self.etas[movie]) > 0
                ll += np.log(gammapdf(self.betas[movie, topic], self.c, self.etas[movie]))

        return ll
    def poisson_distribution(self, lambda_, x):

        """
        Return value of poisson distribution at x with lambda = lambda_
        """

        return poisson.pmf(x, lambda_)
a = np.arange(0, 10, 0.001)
plt.plot(a, expon.pdf(a))  #pdf is probability distribution fun
plt.show()

#Probability mass function
mu = 5  #mean
sigma = 2  #Standard Deviation
values = np.random.normal(mu, sigma, 10000)
plt.hist(values, 50)
plt.show()

#Binomial probability mass function
n, p = 10, 0.5
#n is no. of times experiment runs, p is probability of one outcome
a = np.arange(0, 10, 0.001)
plt.plot(a, binom.pmf(a, n, p))  #pmf is probability mass fun
plt.show()

#Poisson probability mass function
# eg. My Website gets 500 visits avg per day, what is odds of getting 550??
mu = 500  #mean
a = np.arange(400, 600, 0.5)
plt.plot(a, poisson.pmf(a, mu))
plt.show()

# Percentile & Moments
vals = np.random.normal(0, 0.5, 1000)
print(np.percentile(vals, 90))  #value at 90 percentile
print(np.percentile(vals, 50))
print(np.mean(vals), np.var(vals), skew(vals), kurtosis(vals))
Ejemplo n.º 32
0
def C_R(y, e_R, h_R, b, L_R, demand_rate):
    M = round(6 * math.sqrt((L_R + 1) * demand_rate) + (L_R + 1) * demand_rate)
    return y * e_R - h_R * (L_R + 1) * demand_rate + (h_R + b) * sum([
        (d - y) * poisson.pmf(d, (L_R + 1) * demand_rate) for d in range(y, M)
    ])
Ejemplo n.º 33
0
#Henrique K. Secchi
from scipy.stats import poisson

# Média de acidentes de carro é 2 por dia

# Qual a probabilidade de ocorrerem 3 acidentes no dia?
poisson.pmf(3, 2)

# Qual a probabilidade de ocorrerem 3 ou menos acidentes no dia?
poisson.cdf(3, 2)

# Qual a probabilidade de ocorrerem mais de 3 acidentes no dia?
poisson.sf(3, 2)
Ejemplo n.º 34
0
def L(p):
    rec = p_to_rec(p)
    global counter
    counter += 1

    names = [
        'NQ', 'Ts', 'T', 'a_delay', 'q0', 'a0', 'a_pad', 'a_spe', 'a_dpe',
        'a_trpe', 'Spad', 'Spe'
    ]
    for name in names:
        if np.any(rec[name] < 0):
            return 1e10 * (1 - np.amin(rec[name]))
    names = ['q0', 'a0', 'Spad']
    for name in names:
        if np.any(rec[name] > 1):
            return 1e10 * (np.amax(rec[name]))
    if rec['Ts'][0] > 100:
        return 1e10 * rec['Ts'][0]
    if np.any(rec['St'][0] < 0.2):
        return 1e10 * (1 + np.abs(np.amin(rec['St'][0])))
    # if np.any(rec['Tf'][0]<1):
    #     return 1e10*(1+np.abs(np.amin(rec['Tf'][0])))

    l = 0
    P = make_P(rec['a0'][0], rec['Spad'][0], rec['Spe'][0], rec['m_pad'][0])
    m = Model(rec['NQ'][0], rec['T'][0], [0, 0], 0, 1, rec['Ts'][0],
              rec['St'][0], rec['q0'][0], P)
    m_area = model_area(areas, rec['m_pad'][0], rec['a_pad'][0],
                        rec['a_spe'][0], rec['a_dpe'][0], rec['a_trpe'][0],
                        rec['Spad'][0], rec['Spe'][0])
    for i in range(len(pmts)):
        model = np.sum(H[:, 0, i]) * np.ravel(m[:, :500, i])
        if np.any(np.isnan(model)) or np.any(np.isinf(model)):
            print('model is nan or inf')
            print('NQ=', rec['NQ'][0, i], 'T=', rec['T'][0, i], 'F=',
                  rec['F'][0, i], 'Tf=', rec['Tf'][0, i], 'Ts=',
                  rec['Ts'][0, i], 'St=', rec['St'][0, i])
            plt.figure()
            plt.plot(np.mean(t.T * np.arange(np.shape(t)[0])), 'k.')
            plt.show()
            sys.exit()
        data = np.ravel(H[:, :500, i])
        L = len(model)
        for j in range(L):
            if model[j] > 0 and data[j] <= 0:
                l -= model[j] - data[j]
            elif model[j] <= 0 and data[j] > 0:
                return 1e10 * (data[j] - model[j])
            elif model[j] == 0 and data[j] == 0:
                l += 1
            else:
                l += data[j] * np.log(model[j]) - data[j] * np.log(
                    data[j]) + data[j] - model[j]

        model = np.sum(h_q0[i]) * q0_model(n_q0, rec['q0'][0, i])
        data = h_q0[i]
        L = len(model)
        for j in range(L):
            if model[j] > 0 and data[j] <= 0:
                l -= model[j] - data[j]
            elif model[j] <= 0 and data[j] > 0:
                return 1e10 * (data[j] - model[j])
            elif model[j] == 0 and data[j] == 0:
                l += 1
            else:
                l += data[j] * np.log(model[j]) - data[j] * np.log(
                    data[j]) + data[j] - model[j]

        spectra_rng = np.nonzero(
            np.logical_and(PEs > PEs[np.argmax(spectra[i]) - 5],
                           PEs < PEs[np.argmax(spectra[i]) + 5]))[0]
        model = np.sum(H[:, 0, i]) * poisson.pmf(
            PEs, np.sum(m[:, :, i].T * np.arange(np.shape(H)[0])))[spectra_rng]
        data = spectra[i][spectra_rng]
        L = len(model)
        for j in range(L):
            if model[j] > 0 and data[j] <= 0:
                l -= model[j] - data[j]
            elif model[j] <= 0 and data[j] > 0:
                return 1e10 * (data[j] - model[j])
            elif model[j] == 0 and data[j] == 0:
                l += 1
            else:
                l += data[j] * np.log(model[j]) - data[j] * np.log(
                    data[j]) + data[j] - model[j]

        model = m_area[i]
        data = H_areas[i]
        L = len(model)
        for j in range(L):
            if model[j] > 0 and data[j] <= 0:
                l -= model[j] - data[j]
            elif model[j] <= 0 and data[j] > 0:
                return 1e10 * (data[j] - model[j])
            elif model[j] == 0 and data[j] == 0:
                l += 1
            else:
                l += data[j] * np.log(model[j]) - data[j] * np.log(
                    data[j]) + data[j] - model[j]

    model = rec['a_delay'][0] * np.exp(
        -0.5 * (delays[rng_delay] - rec['T'][0, 1] + rec['T'][0, 0])**2 /
        (rec['St'][0, 0]**2 + rec['St'][0, 1]**2)) / np.sqrt(
            2 * np.pi * (rec['St'][0, 0]**2 + rec['St'][0, 1]**2))
    data = delay_h[rng_delay]

    L = len(model)
    for j in range(L):
        if model[j] > 0 and data[j] <= 0:
            l -= model[j] - data[j]
        elif model[j] <= 0 and data[j] > 0:
            return 1e10 * (data[j] - model[j])
        elif model[j] == 0 and data[j] == 0:
            l += 1
        else:
            l += data[j] * np.log(model[j]) - data[j] * np.log(
                data[j]) + data[j] - model[j]

    if counter % (len(p) + 1) == 0:
        print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print('iteration=', int(counter / (len(p) + 1)), 'fanc=', -l)
        print('--------------------------------')
        print(rec)
    return -l
mu_bound = (0, None)

data_0 = minimize(my_pdf, [1, ], args=(z), method='SLSQP',
                  bounds=(mu_bound, ))


print(data_0)

binwidth = 0.1
n_bins = np.arange(min(data[:, 2]), max(data[:, 2]) + binwidth, binwidth)

# Chi2 calculator
observed_values, bins, _ = plt.hist(data[:, 2], bins=n_bins)

# plt.show()
# We normalize by multiplyting the length of the data with the binwidth
expected_values = poisson.pmf(bins, data_0.x[0]) * len(data) 

print(observed_values[observed_values!=0])
print(expected_values[expected_values!=0])
print(chisquare(observed_values[observed_values!=0], f_exp=expected_values[expected_values!=0]))
print('Threshold value ', chi2.isf(0.05, 18))


# x = np.arange(-1, 1, 0.01)
# y = f_6(x, data_0.x[0], data_0.x[1]) 
# plt.plot(x,y)
# plt.show()

Ejemplo n.º 36
0
def likelihood_one_game(goals_ht, goals_at, intercept, mu, a_ht, d_ht, a_at, d_at):
    lambda_ht = np.exp(intercept + mu + a_ht + d_at)
    lambda_at = np.exp(intercept + a_at + d_ht)
    p1 = poisson.pmf(goals_ht, lambda_ht)
    p2 = poisson.pmf(goals_at, lambda_at)
    return(p1 * p2)
Ejemplo n.º 37
0
                      [(df_prediction['Team'] == my_team)
                       & (df_prediction['Oppt'] == op_team) &
                       (df_prediction['AtHome'] == False)]))
        proba_score.append(sc)
        proba_conceed.append(cc)
    # df_sample['ScoreProb'] = proba_score
    # df_sample['ConceedProb'] = proba_conceed

    ##Predicting Scores##
    score_cscc = []
    score_goal = []
    score_asst = []
    for kk in range(0, len(df_sample)):
        score_cscc.append(
            np.sum(
                poisson.pmf(np.arange(0, 7, 1), float(proba_conceed[kk][0])) *
                payout_cscc))
        score_goal.append(
            np.sum(
                poisson.pmf(np.arange(0, 7, 1), float(proba_score[kk][0])) *
                payout_goal * df_sample['GoalRatio'][kk]))
        score_asst.append(
            np.sum(
                poisson.pmf(np.arange(0, 7, 1), float(proba_score[kk][0])) *
                payout_asst * df_sample['AsstRatio'][kk]))

    ## Appending GW Score Factor ##
    score_gw_goal.append(score_goal)
    score_gw_asst.append(score_asst)
    # score_gw_save.append(score_goal)
    score_gw_cscc.append(score_cscc)
Ejemplo n.º 38
0
 def test_pmf_p2(self):
     poisson_pmf = poisson.pmf(2, 2)
     genpoisson_pmf = sm.distributions.genpoisson_p.pmf(2, 2, 0, 2)
     assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
Ejemplo n.º 39
0
 def test_pmf_p1(self):
     poisson_pmf = poisson.pmf(1, 1)
     genpoisson_pmf = sm.distributions.genpoisson_p.pmf(1, 1, 0, 1)
     assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
Ejemplo n.º 40
0
# In[14]:

minutes = np.arange(0, 160, 5)
rv = expon(scale=1. / lambda_from_mean)
plt.plot(minutes, rv.pdf(minutes), 'o')
timediffs.hist(normed=True, alpha=0.5)
plt.xlabel("minutes")
plt.title("Normalized data and model for estimated $\hat{\lambda}$")

# In[15]:

from scipy.stats import poisson
k = np.arange(15)
plt.figure(figsize=(12, 8))
for i, lambda_ in enumerate([1, 2, 4, 6]):
    plt.plot(k, poisson.pmf(k, lambda_), '-o', label=lambda_, color=colors[i])
    plt.fill_between(k, poisson.pmf(k, lambda_), color=colors[i], alpha=0.5)
    plt.legend()
plt.title("Poisson distribution")
plt.ylabel("PDF at $k$")
plt.xlabel("$k$")

# In[18]:

per_hour = df.minutes // 60
num_births_per_hour = df.groupby(per_hour).minutes.count()
num_births_per_hour

# In[19]:

num_births_per_hour.mean()
Ejemplo n.º 41
0
def SimulateMatch(model, homeTeam, awayTeam, maxGoals=10):
    cols = [[awayTeam + ' Goals'] * (maxGoals + 1),
            [goal for goal in range(maxGoals + 1)]]
    ind = [[homeTeam + ' Goals'] * (maxGoals + 1),
           [goal for goal in range(maxGoals + 1)]]
    tuplesCols = list(zip(*cols))
    tuplesInd = list(zip(*ind))
    columns = pd.MultiIndex.from_tuples(tuplesCols,
                                        names=['Away Team', 'Goals'])
    index = pd.MultiIndex.from_tuples(
        tuplesInd, names=['Home Team', 'Percent Probability'])
    htAvgGoals = model.predict(
        pd.DataFrame(data={
            'team': homeTeam,
            'opponent': awayTeam,
            'home': 1
        },
                     index=[1])).values[0]
    atAvgGoals = model.predict(
        pd.DataFrame(data={
            'team': awayTeam,
            'opponent': homeTeam,
            'home': 0
        },
                     index=[1])).values[0]
    FixtureCalculated = [[
        round(poisson.pmf(i, teamAvg), 1) for i in range(0, maxGoals + 1)
    ] for teamAvg in [htAvgGoals, atAvgGoals]]

    HomeVsAway = pd.DataFrame(np.outer(np.array(FixtureCalculated[0]),
                                       np.array(FixtureCalculated[1])),
                              columns=columns,
                              index=index)
    HomeVsAway = HomeVsAway.style.set_table_styles(
        [dict(selector='th', props=[('text-align', 'center')])])
    HomeVsAway.set_properties(**{'text-align': 'center'})

    HomeAwayDraw = {
        'Home':
        round(
            np.sum(
                np.tril(
                    np.outer(np.array(FixtureCalculated[0]),
                             np.array(FixtureCalculated[1])), -1)), 3),
        'Draw':
        round(
            np.sum(
                np.diag(
                    np.outer(np.array(FixtureCalculated[0]),
                             np.array(FixtureCalculated[1])))), 3),
        'Away':
        round(
            np.sum(
                np.triu(
                    np.outer(np.array(FixtureCalculated[0]),
                             np.array(FixtureCalculated[1])), 1)), 3)
    }

    returnable = [[round(htAvgGoals, 0),
                   round(atAvgGoals, 0)], HomeVsAway,
                  pd.DataFrame(HomeAwayDraw.values(),
                               index=HomeAwayDraw.keys()).T]
    return returnable
# Python - POISSON DISTRIBUTION - PROBABILITY DENSITY FUNCTION

# http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.poisson.html
from scipy.stats import poisson

# Enter the value of the mean of the distribution:
mean = 10

# Enter the value of x (integer) at which the value of the density function is required:
x = 6

# The value at x of the probability density function f(x) is:
print()
print ("The value of the probability density function at x =", x, "is f(x) =", poisson.pmf(x, mean))


Ejemplo n.º 43
0
 def quick_poisson_pmf(n, lam):
     global POISSON_PMF_DICT
     key = (n, lam)
     if key not in POISSON_PMF_DICT:
         POISSON_PMF_DICT[key] = poisson.pmf(n, lam)
     return POISSON_PMF_DICT[key]
Ejemplo n.º 44
0
 def test_pmf_p5(self):
     poisson_pmf = poisson.pmf(10, 2)
     genpoisson_pmf_5 = sm.distributions.genpoisson_p.pmf(10, 2, 1e-25, 5)
     assert_allclose(poisson_pmf, genpoisson_pmf_5, rtol=1e-12)
Ejemplo n.º 45
0
def negativelyAffectedTrips (T,N,X,m,cluster,bikeData,parLambda,nDays,A,
                            poissonArray,timesArray,ind=None,randomSeed=None,
                            nStations=329):
    """
    Counts the number of negatively affected trips.
    We divide the bike stations in m groups according to K-means algorithm.
    The bikes are distributed uniformly in each group.
    
    Args:
        T (int): Duration of the simulation in hours (it always starts at 7:00am).
        N (numpy array): Vector N(T,A_{i}).
        X (numpy array): Vector with the initial configuration of the bikes.
        m (int): Number of groups formed with the bike stations.
        cluster (List[List[List[int,float,float]]]):
                            Contains the clusters of the bike stations
                            with their ids, and geographic coordinates.
        bikeData (numpy array): Matrix with the ID, numberDocks, Latitute,
                                and longitude of each bike station.
        parLambda (numpy array) : Vector with the parameters of the
                                  Poisson processes  N(T,A_{i}).
        nDays: Number of different days considered in the simulation (i.e. 365).
        A (List[List[Tuple(int,int)]]): List with subsets of pair of bike stations.
                lamb (List[numpy array]): List with the parameters
                                  of the Poisson processes N(T,(i,j)), 
                                  (i.e. jth entry of lamb is the Poisson process
                                   with parameter lamb[0][0,j] between stations
                                   lamb[0][1,j] and lamb[0][2,j]).
        poissonArray (List[List[numpy array]]):
            List with the parameters of the Poisson processes N(T,(i,j)), where 
            the jth entry of poissonArray are the parameters of the Poissons
            processes of day j between each pair of bike stations.
            (i.e., the parameter of the Poisson process on day j between stations
            lamb[j][0][1,l] and lamb[j][0][2,l] is lamb[j][0][0,l]. This is a
            sparse representation of the original matrix, and so if a pair of
            stations doesn't appear in the last list, its PP has parameter zero.
        timesArray (List[List[numpy array]]):
            Similar tan poissonArray, but with the mean times of traveling between
            the stations.
        ind (int or None): Day of the year when the simulation is run.
        randomSeed (int): Random seed.
        nStations (int): Number of bike stations.
        
    Returns:
        int: Overall number of negatively affected tripes multiplied by -1.
    """
    if randomSeed is not None:
        randst = np.random.mtrand.RandomState(randomSeed)
    else:
        randst=np.random

    if ind is None:
        probs=poisson.pmf(int(N[0]),mu=np.array(parLambda))
        probs=probs/np.sum(probs)
        ind=randst.choice(range(nDays),size=1,p=probs)
        
    exponentialTimes=timesArray[ind][0]
    exponentialTimes2=np.zeros((nStations,nStations))
    nExp=len(exponentialTimes[0,:])
    for i in range(nExp):
        t1=exponentialTimes[1,i]
        t2=exponentialTimes[2,i]
        exponentialTimes2[t1,t2]=exponentialTimes[0,i]
    poissonParam=poissonArray[ind]

    unHappy=0
    state=startInitialConfiguration(X,m,cluster,bikeData,nDays)

    nSets=1
    times=[]
    nTimes=0
    for i in range(nSets):
        temp=PoissonProcess(T,poissonParam,A[i],N[i],randst)

        nTimes+=temp[1]
        times.extend(temp[0])

    Times=np.zeros((nTimes,3))
    k=0
    for i in range(len(times)):
        for j in range(len(times[i][1])):
            Times[k,0]=times[i][1][j] #arrival times
            Times[k,1]=times[i][0][0] #station i
            Times[k,2]=times[i][0][1] #station j
            k+=1
    Times=Times[Times[:,0].argsort()]
    currentTime=0
    dropTimes=[]
    for i in xrange(nTimes):
        currentTime=Times[i,0]
        while (dropTimes and currentTime>dropTimes[0][0]):
            if state[dropTimes[0][1],0]>0:
                state[dropTimes[0][1],0]=state[dropTimes[0][1],0]-1
                state[dropTimes[0][1],1]+=1
                dropTimes.pop(0)
            else:
                unHappy+=1
                j=findBikeStation(state,dropTimes[0][1])
                state[j,0]=state[j,0]-1
                state[j,1]=state[j,1]+1
                dropTimes.pop(0)
        bikePickUp=Times[i,1]
        bikeDrop=Times[i,2]

        if state[bikePickUp,1]==0:
            unHappy+=1
            continue
        indi=exponentialTimes[1,]
        timeUsed=randst.exponential(exponentialTimes2[bikePickUp,bikeDrop])
        dropTimes.append((currentTime+timeUsed,bikeDrop))
        dropTimes=sorted(dropTimes, key=lambda x:x[0])
        state[bikePickUp,1]=state[bikePickUp,1]-1
        state[bikePickUp,0]=state[bikePickUp,0]+1
    return -unHappy
Ejemplo n.º 46
0
 def test_pmf_zero(self):
     poisson_pmf = poisson.pmf(3, 2)
     zipoisson_pmf = sm.distributions.zipoisson.pmf(3, 2, 0)
     assert_allclose(poisson_pmf, zipoisson_pmf, rtol=1e-12)
Ejemplo n.º 47
0
 def level_distribution(self, k, mu):
     _dists = np.array([poisson.pmf(kk, mu) for kk in range(1, k+1)])
     return _dists / np.sum(_dists)
Ejemplo n.º 48
0
 def test_pmf(self):
     poisson_pmf = poisson.pmf(2, 2)
     zipoisson_pmf = sm.distributions.zipoisson.pmf(2, 2, 0.1)
     assert_allclose(poisson_pmf, zipoisson_pmf, rtol=5e-2, atol=5e-2)
Ejemplo n.º 49
0
mu = 5.0
sigma = 2.0
values = np.random.normal(mu, sigma, 10000)
plt.hist(values, 50)
plt.show()

#Expontial PDF(Probability Distribution Function)/"Power Law"
x = np.arange(0, 10, 10000)
plt.plot(x, expon.pdf(x))
#Binomial probability mass function
n, p = 10, 0.5
x = np.arange(0, 10, 0.001)
plt.plot(x, binom.pmf(x, n, p))

#Poisson probability mass function
mu = 500
x = np.arange(400, 600, 0.5)
plt.plot(x, poisson.pmf(x, mu))

vals = np.random.normal(0, 0.5, 10000)
plt.hist(vals, 50)
plt.show()

print(np.mean(vals))
print(np.var(vals))
print(sp.skew(vals))
print(sp.kurtosis(vals))

plt.hist(vals, 50)
plt.show()
Ejemplo n.º 50
0
    if params.detailed:
        print(
            "\nThe TERMINAL branch length is %1.3e, expecting %1.1f mutations vs an observed %d"
            % (corrected_terminal_branch_length, expected_terminal_mutations,
               terminal_mutation_count))
        print("Of these %d mutations," % terminal_mutation_count + "".join([
            '\n\t - %d occur %d times' % (n, mi)
            for mi, n in enumerate(multiplicities_terminal) if n
        ]))

    ###########################################################################
    ### Output the distribution of times mutations at particular positions are observed
    ###########################################################################
    print("\nOf the %d positions in the genome," % L + "".join([
        '\n\t - %d were hit %d times (expected %1.2f)' %
        (n, mi, L * poisson.pmf(mi, 1.0 * total_mutations / L))
        for mi, n in enumerate(multiplicities_positions) if n
    ]))

    # compare that distribution to a Poisson distribution with the same mean
    p = poisson.pmf(np.arange(10 * multiplicities_positions.max()),
                    1.0 * total_mutations / L)
    print(
        "\nlog-likelihood difference to Poisson distribution with same mean: %1.3e"
        % (-L * np.sum(p * np.log(p + 1e-100)) +
           np.sum(multiplicities_positions *
                  np.log(p[:len(multiplicities_positions)] + 1e-100))))

    ###########################################################################
    ### Output the mutations that are observed most often
    ###########################################################################
Ejemplo n.º 51
0
         linewidth=3)

ax1.legend(fontsize=15)
ax2.legend(fontsize=15)
ax2.set_xlabel('Time [ns]', fontsize='15')
fig.text(0.04,
         0.5,
         r'$N_{events}\sum_n nH_{ni}$',
         va='center',
         rotation='vertical',
         fontsize=15)

spectra_rng = np.nonzero(
    np.logical_and(PEs > PEs[np.argmax(spectra[0]) - 5],
                   PEs < PEs[np.argmax(spectra[0]) + 5]))[0]
model = np.sum(H[:, 0, 0]) * poisson.pmf(
    PEs, np.sum(m[:, :, 0].T * np.arange(np.shape(H)[0])))[spectra_rng]
ax3.plot(spectra[0], 'ko', label='spectrum - PMT7')
ax3.plot(PEs[spectra_rng], model, 'r-.')

spectra_rng = np.nonzero(
    np.logical_and(PEs > PEs[np.argmax(spectra[1]) - 5],
                   PEs < PEs[np.argmax(spectra[1]) + 5]))[0]
model = np.sum(H[:, 0, 1]) * poisson.pmf(
    PEs, np.sum(m[:, :, 1].T * np.arange(np.shape(H)[0])))[spectra_rng]
ax4.plot(spectra[1], 'ko', label='spectrum - PMT7')
ax4.plot(PEs[spectra_rng], model, 'r-.')

fig, ((ax1, ax3), (ax2, ax4)) = plt.subplots(2, 2)
ax3.plot(delays, delay_h, 'ko')
ax3.plot(delays[rng_delay],
         rec['a_delay'][0] *
Ejemplo n.º 52
0
    def _get_fit(self, per_loc, per_admit, per_cc, LOS_cc, LOS_nc, per_vent,
                 ppe_GLOVE_SURGICAL, ppe_GLOVE_EXAM_NITRILE,
                 ppe_GLOVE_GLOVE_EXAM_VINYL, ppe_MASK_FACE_PROCEDURE_ANTI_FOG,
                 ppe_MASK_PROCEDURE_FLUID_RESISTANT,
                 ppe_GOWN_ISOLATION_XLARGE_YELLOW,
                 ppe_MASK_SURGICAL_ANTI_FOG_W_FILM,
                 ppe_SHIELD_FACE_FULL_ANTI_FOG,
                 ppe_RESPIRATOR_PARTICULATE_FILTER_REG, TimeLag, PopSize,
                 ForecastDays, forecasted_y, focal_loc, fdates, new_cases,
                 model, Forecasted_cases_df_for_download,
                 Forecasted_patient_census_df_for_download,
                 Forecasted_ppe_needs_df_for_download):

        # declare figure object
        fig = plt.figure(figsize=(15, 17))

        # Declare figure axis to hold table of forecasted cases, visits, admits
        ax = plt.subplot2grid((6, 4), (0, 2), colspan=2, rowspan=2)
        # The figure will actually be a table so turn the figure axes off
        ax.axis('off')

        # shorten location name if longer than 12 characters
        loc = str(focal_loc)
        if len(loc) > 12:
            loc = loc[:12]
            loc = loc + '...'

        # declare column labels
        col_labels = ['Total cases', 'New cases', 'New visits', 'New admits']

        # row labels are the dates
        row_labels = fdates.tolist()

        # truncate forecasted_y to only the current day and days
        # in the forecast window

        # lists to hold table values
        table_vals = []
        cclr_vals = []
        rclr_vals = []

        #### Inclusion of time lag
        # time lag is modeled as a Poisson distributed
        # random variable with a mean chosen by the user (TimeLag)
        new_cases_lag = []
        x = list(range(len(forecasted_y)))
        for i in new_cases:
            lag_pop = i * poisson.pmf(x, TimeLag)
            new_cases_lag.append(lag_pop)

        # Declare a list to hold time-staggered lists
        # This will allow the time-lag effects to
        # be summed across rows (days)
        lol = []
        for i, daily_vals in enumerate(new_cases_lag):
            # number of indices to pad in front
            fi = [0] * i
            diff = len(new_cases) - len(fi)
            # number of indices to pad in back
            bi = [0] * diff
            ls = list(fi) + list(daily_vals) + list(bi)
            lol.append(np.array(ls))

        # convert the list of time-staggered lists to an array
        ar = np.array(lol)

        # get the time-lagged sum of visits across days
        ts_lag = np.sum(ar, axis=0)
        # upper truncate for the number of days in observed y values
        ts_lag = ts_lag[:len(new_cases)]
        ts_lag = ts_lag[:len(new_cases)]

        # row labels are the dates
        row_labels = fdates.tolist()
        # only show the current date and dates in the forecast window
        row_labels = row_labels[-(ForecastDays + 1):]

        # lower truncate lists for forecast window
        # that is, do not include days before present day
        new_cases = new_cases[-(ForecastDays + 1):]
        forecasted_y = forecasted_y[-(ForecastDays + 1):]
        ts_lag2 = ts_lag[-(ForecastDays + 1):]

        # Declare pandas dataframe to hold data for download
        Forecasted_cases_df_for_download = pd.DataFrame(columns=['date'] +
                                                        col_labels)

        # For each date intended for the output table

        Total = []
        New = []
        Visits = []
        Admits = []
        for i in range(len(row_labels)):

            new = new_cases[i]
            val = ts_lag2[i]

            # each cell is a row with 4 columns:
            #     Total cases,
            #     new cases,
            #     time-lagged visits to your hospital,
            #     time-lagged admits to your hospital

            cell = [
                int(np.round(forecasted_y[i])),
                int(np.round(new)),
                int(np.round(val * (per_loc * 0.01))),
                int(np.round((0.01 * per_admit) * val * (per_loc * 0.01)))
            ]

            Total.append(cell[0])
            New.append(cell[1])
            Visits.append(cell[2])
            Admits.append(cell[3])

            # Add the row to the dataframe
            df_row = [row_labels[i]]
            df_row.extend(cell)
            labs = ['date'] + col_labels
            temp = pd.DataFrame([df_row], columns=labs)
            Forecasted_cases_df_for_download = pd.concat(
                [Forecasted_cases_df_for_download, temp])

            # color the first row grey and remaining rows white
            if i == 0:
                rclr = '0.8'
                cclr = ['0.8', '0.8', '0.8', '0.8']
            else:
                rclr = 'w'
                cclr = ['w', 'w', 'w', 'w']
            table_vals.append(cell)
            cclr_vals.append(cclr)
            rclr_vals.append(rclr)

        # Generate and customize table for output
        ncol = 4
        lim = 15

        the_table = plt.table(cellText=table_vals[0:lim],
                              colWidths=[0.32, 0.32, 0.32, 0.32],
                              rowLabels=row_labels[0:lim],
                              colLabels=col_labels,
                              cellLoc='center',
                              loc='upper center',
                              cellColours=cclr_vals[0:lim],
                              rowColours=rclr_vals[0:lim])

        the_table.auto_set_font_size(True)
        the_table.scale(1, 1.32)

        # Customize table title
        titletext = 'Forecasted cases for ' + loc + '\nData beyond 14 days is available in the csv (below)'
        plt.title(titletext, fontsize=14, fontweight='bold')

        ax = plt.subplot2grid((6, 4), (0, 0), colspan=2, rowspan=2)

        #plt.plot(row_labels, Total, c='0.2', label='Total cases', linewidth=3)
        #plt.plot(row_labels, New, c='0.5', label='New cases', linewidth=3)
        plt.plot(row_labels,
                 Visits,
                 c='Crimson',
                 label='New visits',
                 linewidth=3)
        plt.plot(row_labels,
                 Admits,
                 c='Steelblue',
                 label='New admits',
                 linewidth=3)

        plt.title('Forecasted visits & admits', fontsize=16, fontweight='bold')

        # log-scale y-values to base 10 if the user has chosen
        #if log_scl == True:
        #    plt.yscale('log')

        # As before, limit dates displayed on the x-axis
        # prevents overcrowding
        ax = plt.gca()
        temp = ax.xaxis.get_ticklabels()
        temp = list(set(temp) - set(temp[::12]))
        for label in temp:
            label.set_visible(False)

        # As before, remove legend line handles and change the color of
        # the text to match the color of the line
        leg = ax.legend(handlelength=0,
                        handletextpad=0,
                        fancybox=False,
                        loc='best',
                        frameon=False,
                        fontsize=14)

        for line, text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())

        for item in leg.legendHandles:
            item.set_visible(False)

        plt.ylabel('COVID-19 cases', fontsize=14, fontweight='bold')
        plt.xlabel('Date', fontsize=14, fontweight='bold')

        # Generate figure for patient census
        ax = plt.subplot2grid((6, 4), (2, 0), colspan=2, rowspan=2)

        #### Construct arrays for critical care and non-critical care patients
        cc = (0.01 * per_cc) * (0.01 * per_admit) * (
            0.01 * per_loc) * np.array(ts_lag)
        cc = cc.tolist()

        nc = (1 - (0.01 * per_cc)) * (0.01 * per_admit) * (
            0.01 * per_loc) * np.array(ts_lag)
        nc = nc.tolist()

        # LOS for non critical care = 5 days
        # LOS for critical care = 10 days

        # Model length of stay (LOS) as a lognormally distributed
        # random variable

        #sigma = 0.3
        #n_cc = np.log(LOS_cc) - (sigma**2)/2
        #n_nc = np.log(LOS_nc) - (sigma**2)/2

        #x_vars = np.array(list(range(1, len(fdates)+1)))

        #p_nc = 0.5 + 0.5 * sc.special.erf((np.log(x_vars) - n_nc)/(2**0.5*sigma))
        #p_cc = 0.5 + 0.5 * sc.special.erf((np.log(x_vars) - n_cc)/(2**0.5*sigma))

        # Model length of stay (LOS) as a binomially distributed
        # random variable according to binomial parameters p and n
        #    p: used to obtain a symmetrical distribution
        #    n: (n_cc & n_nc) = 2 * LOS will produce a binomial
        #       distribution with a mean equal to the LOS

        p = 0.1
        n_cc = LOS_cc * 10
        n_nc = LOS_nc * 10

        # get the binomial random variable properties
        rv_nc = binom(n_nc, p)
        # Use the binomial cumulative distribution function
        p_nc = rv_nc.cdf(np.array(range(1, len(fdates) + 1)))

        # get the binomial random variable properties
        rv_cc = binom(n_cc, p)
        # Use the binomial cumulative distribution function
        p_cc = rv_cc.cdf(np.array(range(1, len(fdates) + 1)))

        # Initiate lists to hold numbers of critical care and non-critical care patients
        # who are expected as new admits (index 0), as 1 day patients, 2 day patients, etc.
        LOScc = np.zeros(len(fdates))
        LOScc[0] = ts_lag[0] * (0.01 * per_cc) * (0.01 * per_admit) * (0.01 *
                                                                       per_loc)
        LOSnc = np.zeros(len(fdates))
        LOSnc[0] = ts_lag[0] * (1 - (0.01 * per_cc)) * (0.01 * per_admit) * (
            0.01 * per_loc)

        total_nc = []
        total_cc = []

        # Roll up patient carry-over into lists of total critical care and total
        # non-critical patients expected
        for i, day in enumerate(fdates):
            LOScc = LOScc * (1 - p_cc)
            LOSnc = LOSnc * (1 - p_nc)

            LOScc = np.roll(LOScc, shift=1)
            LOSnc = np.roll(LOSnc, shift=1)

            LOScc[0] = ts_lag[i] * (0.01 * per_cc) * (0.01 * per_admit) * (
                0.01 * per_loc)
            LOSnc[0] = ts_lag[i] * (1 - (0.01 * per_cc)) * (
                0.01 * per_admit) * (0.01 * per_loc)

            total_nc.append(np.sum(LOSnc))
            total_cc.append(np.sum(LOScc))

        # Plot the critical care and non-critical care patient census over the
        # forecasted time frame
        plt.plot(fdates[-(ForecastDays + 1):],
                 total_cc[-(ForecastDays + 1):],
                 c='m',
                 label='Critical care',
                 linewidth=3)
        plt.plot(fdates[-(ForecastDays + 1):],
                 total_nc[-(ForecastDays + 1):],
                 c='0.4',
                 label='Non-critical care',
                 linewidth=3)
        plt.title('Forecasted census', fontsize=16, fontweight='bold')

        # log-scale y-values to base 10 if the user has chosen
        #if log_scl == True:
        #    plt.yscale('log')

        # As before, limit dates displayed on the x-axis
        # prevents overcrowding
        ax = plt.gca()
        temp = ax.xaxis.get_ticklabels()
        temp = list(set(temp) - set(temp[::12]))
        for label in temp:
            label.set_visible(False)

        # As before, remove legend line handles and change the color of
        # the text to match the color of the line
        leg = ax.legend(handlelength=0,
                        handletextpad=0,
                        fancybox=False,
                        loc='best',
                        frameon=False,
                        fontsize=14)

        for line, text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())

        for item in leg.legendHandles:
            item.set_visible(False)

        plt.ylabel('COVID-19 patients', fontsize=14, fontweight='bold')
        plt.xlabel('Date', fontsize=14, fontweight='bold')

        # Declare axis to be used for patient census table
        # and turn the visibility off
        ax = plt.subplot2grid((6, 4), (2, 2), colspan=2, rowspan=2)
        ax.axis('off')

        # Truncate location names if longer than 12 characters
        if len(loc) > 12:
            loc = loc[:12]
            loc = loc + '...'

        # declare table column labels
        col_labels = ['All COVID', 'Non-ICU', 'ICU', 'Vent']

        # declare row labels as dates
        row_labels = fdates.tolist()

        # truncate row labels and values to the present day
        # and days in the forecast window
        row_labels = row_labels[-(ForecastDays + 1):]
        total_nc_trunc = total_nc[-(ForecastDays + 1):]
        total_cc_trunc = total_cc[-(ForecastDays + 1):]

        # declare lists to hold table cell values and
        # row colors
        table_vals, cclr_vals, rclr_vals = [], [], []

        # declare pandas dataframe to hold patient census data for download
        Forecasted_patient_census_df_for_download = pd.DataFrame(
            columns=['date'] + col_labels)
        # For each row...
        for i in range(len(row_labels)):
            # Each cell is a row that holds:
            #    Total number of admits expected,
            #    Total number of non-critical care COVID-19 patients expected
            #    Total number of critical care COVID-19 patents expected
            #    Total number of ICU patients on ventilators expected
            cell = [
                int(np.round(total_nc_trunc[i] + total_cc_trunc[i])),
                int(np.round(total_nc_trunc[i])),
                int(np.round(total_cc_trunc[i])),
                int(np.round(total_cc_trunc[i] * (0.01 * per_vent)))
            ]

            # add the cell to the dataframe intended for csv download
            df_row = [row_labels[i]]
            df_row.extend(cell)
            labs = ['date'] + col_labels
            temp = pd.DataFrame([df_row], columns=labs)
            Forecasted_patient_census_df_for_download = pd.concat(
                [Forecasted_patient_census_df_for_download, temp])

            # set colors of rows
            if i == 0:
                rclr = '0.8'
                cclr = ['0.8', '0.8', '0.8', '0.8']
            else:
                rclr = 'w'
                cclr = ['w', 'w', 'w', 'w']

            # append cells and colors to respective lists
            table_vals.append(cell)
            cclr_vals.append(cclr)
            rclr_vals.append(rclr)

        # limit the number of displayed table rows
        ncol = 4
        lim = 15

        # declare and customize the table
        the_table = plt.table(cellText=table_vals[0:lim],
                              colWidths=[0.255, 0.255, 0.255, 0.255],
                              rowLabels=row_labels[0:lim],
                              colLabels=col_labels,
                              cellLoc='center',
                              loc='upper center',
                              cellColours=cclr_vals[0:lim],
                              rowColours=rclr_vals[0:lim])

        the_table.auto_set_font_size(True)
        the_table.scale(1, 1.32)

        # Set the plot (table) title
        titletext = 'Beds needed for COVID-19 cases' + '\nData beyond 14 days is available in the csv (below)'
        plt.title(titletext, fontsize=14, fontweight='bold')

        ####################### PPE ##################################
        ax = plt.subplot2grid((6, 4), (4, 0), colspan=2, rowspan=2)

        #### Construct arrays for critical care and non-critical care patients

        # All covid patients expected in house on each forecasted day. PUI is just a name here

        PUI_COVID = np.array(total_nc) + np.array(total_cc)
        # Preparing to add new visits, fraction of new cases visiting your hospital = 0.01 * per_loc
        new_visits_your_hospital = ts_lag * (0.01 * per_loc)
        # Add number of new visits to number of in house patients
        PUI_COVID = PUI_COVID + new_visits_your_hospital

        glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int')
        glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE *
                                 PUI_COVID).astype('int')
        glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL *
                               PUI_COVID).astype('int')
        face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG *
                             PUI_COVID).astype('int')
        procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT *
                                  PUI_COVID).astype('int')
        isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW *
                                  PUI_COVID).astype('int')
        surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM *
                                 PUI_COVID).astype('int')
        face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG *
                               PUI_COVID).astype('int')
        respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG *
                              PUI_COVID).astype('int')

        ppe_ls = [
            [glove_surgical, 'GLOVE SURGICAL', 'r'],
            [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'],
            [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'],
            [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'],
            [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'],
            [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'],
            [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'],
            [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'],
            [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet']
        ]

        linestyles = [
            'dashed', 'dotted', 'dashdot', 'dashed', 'dotted', 'dashdot',
            'dotted', 'dashed', 'dashdot'
        ]

        for i, ppe in enumerate(ppe_ls):
            plt.plot(fdates[-(ForecastDays + 1):],
                     ppe[0][-(ForecastDays + 1):],
                     c=ppe[2],
                     label=ppe[1],
                     linewidth=2,
                     ls=linestyles[i])

        plt.title('Forecasted PPE needs', fontsize=16, fontweight='bold')
        #if log_scl == True:
        #    plt.yscale('log')

        ax = plt.gca()
        temp = ax.xaxis.get_ticklabels()
        temp = list(set(temp) - set(temp[::12]))
        for label in temp:
            label.set_visible(False)

        leg = ax.legend(handlelength=0,
                        handletextpad=0,
                        fancybox=True,
                        loc='best',
                        frameon=True,
                        fontsize=8)

        for line, text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())

        for item in leg.legendHandles:
            item.set_visible(False)

        plt.ylabel('PPE Supplies', fontsize=14, fontweight='bold')
        plt.xlabel('Date', fontsize=14, fontweight='bold')

        ax = plt.subplot2grid((6, 4), (4, 2), colspan=2, rowspan=2)
        ax.axis('off')
        #ax.axis('tight')

        #### Construct arrays for critical care and non-critical care patients
        #PUI_COVID = np.array(total_nc) + np.array(total_cc)
        PUI_COVID = PUI_COVID[-(ForecastDays + 1):]

        glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int')
        glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE *
                                 PUI_COVID).astype('int')
        glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL *
                               PUI_COVID).astype('int')
        face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG *
                             PUI_COVID).astype('int')
        procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT *
                                  PUI_COVID).astype('int')
        isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW *
                                  PUI_COVID).astype('int')
        surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM *
                                 PUI_COVID).astype('int')
        face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG *
                               PUI_COVID).astype('int')
        respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG *
                              PUI_COVID).astype('int')

        ppe_ls = [
            [glove_surgical, 'GLOVE SURGICAL', 'r'],
            [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'],
            [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'],
            [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'],
            [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'],
            [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'],
            [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'],
            [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'],
            [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet']
        ]

        if len(loc) > 12:
            loc = loc[:12]
            loc = loc + '...'

        col_labels = [
            ppe_ls[0][1], ppe_ls[1][1], ppe_ls[2][1], ppe_ls[3][1],
            ppe_ls[4][1], ppe_ls[5][1], ppe_ls[6][1], ppe_ls[7][1],
            ppe_ls[8][1]
        ]

        row_labels = fdates.tolist()
        row_labels = row_labels[-(ForecastDays + 1):]

        table_vals = []
        cclr_vals = []
        rclr_vals = []

        Forecasted_ppe_needs_df_for_download = pd.DataFrame(columns=['date'] +
                                                            col_labels)
        for i in range(len(row_labels)):

            cell = [
                ppe_ls[0][0][i], ppe_ls[1][0][i], ppe_ls[2][0][i],
                ppe_ls[3][0][i], ppe_ls[4][0][i], ppe_ls[5][0][i],
                ppe_ls[6][0][i], ppe_ls[7][0][i], ppe_ls[8][0][i]
            ]

            df_row = [row_labels[i]]
            df_row.extend(cell)

            labs = ['date'] + col_labels
            temp = pd.DataFrame([df_row], columns=labs)
            Forecasted_ppe_needs_df_for_download = pd.concat(
                [Forecasted_ppe_needs_df_for_download, temp])

            if i == 0:
                rclr = '0.8'
                cclr = [
                    '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8',
                    '0.8'
                ]
            else:
                rclr = 'w'
                cclr = ['w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w']

            table_vals.append(cell)
            cclr_vals.append(cclr)
            rclr_vals.append(rclr)

        #ncol = 9
        cwp = 0.15
        lim = 15

        the_table = plt.table(cellText=table_vals[0:lim],
                              colWidths=[cwp] * 9,
                              rowLabels=row_labels[0:lim],
                              colLabels=None,
                              cellLoc='center',
                              loc='upper center',
                              cellColours=cclr_vals[0:lim],
                              rowColours=rclr_vals[0:lim])

        the_table.auto_set_font_size(True)
        the_table.scale(1, 1.32)

        for i in range(len(ppe_ls)):
            clr = ppe_ls[i][2]
            for j in range(lim):
                the_table[(j, i)].get_text().set_color(clr)

        # set values for diagonal column labels
        hoffset = -0.3  #find this number from trial and error
        voffset = 1.0  #find this number from trial and error
        col_width = [0.06, 0.09, 0.09, 0.12, 0.133, 0.138, 0.128, 0.135, 0.142]

        col_labels2 = [['GLOVE SURGICAL', 'r'],
                       ['GLOVE EXAM NITRILE', 'orange'],
                       ['GLOVE GLOVE EXAM VINYL', 'goldenrod'],
                       ['MASK FACE PROC. A-FOG', 'limegreen'],
                       ['MASK PROC. FLUID RES.', 'green'],
                       ['GOWN ISO. XL YELLOW', 'cornflowerblue'],
                       ['MASK SURG. ANTI FOG W/FILM', 'blue'],
                       ['SHIELD FACE FULL ANTI FOG', 'plum'],
                       ['RESP. PART. FILTER REG', 'darkviolet']]

        count = 0
        for i, val in enumerate(col_labels2):
            ax.annotate('  ' + val[0],
                        xy=(hoffset + count * col_width[i], voffset),
                        xycoords='axes fraction',
                        ha='left',
                        va='bottom',
                        rotation=-25,
                        size=8,
                        c=val[1])
            count += 1

        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=1.1,
                            hspace=1.1)
        return Forecasted_cases_df_for_download, Forecasted_patient_census_df_for_download, Forecasted_ppe_needs_df_for_download
Ejemplo n.º 53
0
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha):
    r"""Analytical acceptance interval for Poisson process with background.

    .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha

    For more information see :ref:`documentation <feldman_cousins>`.

    Parameters
    ----------
    mu : float
        Mean of the signal
    background : float
        Mean of the background
    x_bins : array-like
        Bins in x
    alpha : float
        Desired confidence level

    Returns
    -------
    (x_min, x_max) : tuple of floats
        Acceptance interval
    """
    dist = poisson(mu=mu + background)

    x_bin_width = x_bins[1] - x_bins[0]

    p = []
    r = []

    for x in x_bins:
        p.append(dist.pmf(x))
        # Implementing the boundary condition at zero
        muBest = max(0, x - background)
        probMuBest = poisson.pmf(x, mu=muBest + background)
        # probMuBest should never be zero. Check it just in case.
        if probMuBest == 0.0:
            r.append(0.0)
        else:
            r.append(p[-1] / probMuBest)

    p = np.asarray(p)
    r = np.asarray(r)

    if sum(p) < alpha:
        raise ValueError("X bins don't contain enough probability to reach "
                         "desired confidence level for this mu!")

    rank = rankdata(-r, method="dense")

    index_array = np.arange(x_bins.size)

    rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array)))

    index_min = index_array_sorted[0]
    index_max = index_array_sorted[0]

    p_sum = 0

    for i in range(len(rank_sorted)):
        if index_array_sorted[i] < index_min:
            index_min = index_array_sorted[i]
        if index_array_sorted[i] > index_max:
            index_max = index_array_sorted[i]
        p_sum += p[index_array_sorted[i]]
        if p_sum >= alpha:
            break

    return x_bins[index_min], x_bins[index_max] + x_bin_width
Ejemplo n.º 54
0
    n1 = pbc(L, Lx, Ly, Lz[0], x, y, z, X13, Y13, Z13, np13)
    n2 = pbc(L, Lx, Ly, Lz[1], x, y, z + Lz[0], X14, Y14, Z14, np14)
    n3 = pbc(L, Lx, Ly, Lz[2], x, y, z + Lz[0] + Lz[1], X15, Y15, Z15, np15)
    Nhalos.append(n1 + n2 + n3)
    i += 1
#print Nhalos

nhalos = np.linspace(0, np.amax(Nhalos), np.amax(Nhalos) + 1)

print "Nhalos mean", np.mean(Nhalos)
print "Standard deviation", np.std(Nhalos)
Poisson = []
print "Len nhalos", len(nhalos)
l = np.mean(Nhalos)
for i in range(len(nhalos)):
    Poisson.append(poisson.pmf(nhalos[i], l))
f = open("Nhalo.dat", "w")
for i in range(len(Nhalos)):
    f.write(str(Nhalos[i]))
f.close()

plt.hist(Nhalos,
         range=(0, np.amax(Nhalos)),
         bins=(np.amax(Nhalos) + 3.0) / 9.0,
         normed=True)
#plt.hist(Nhalos,  bins=(np.amax(Nhalos)+3.0)/9.0, normed=True)
plt.plot(nhalos, Poisson, c='r', linewidth='2.5')
plt.xlabel(r"$\mathrm{N}$", fontsize=25)
plt.ylabel(r"$\mathrm{P(N)}$", fontsize=25)
#plt.text(20, 0.02, r"$\mathrm{L = 1.58^8L_{\odot}}$", fontsize=20)
#plt.text(20, 0.017, r"$\mathrm{M_{halo} = 7.28^{10}M_{\odot}}$", fontsize=20)
Ejemplo n.º 55
0
 def _pmf(self, x, l, p, w):
     return w * poisson.pmf(x, l) + (1 - w) * geom.pmf(x, p, loc=-1)
Ejemplo n.º 56
0
def poisson_probability(n, lam):
    global poisson_cache
    key = n * 10 + lam
    if key not in poisson_cache:
        poisson_cache[key] = poisson.pmf(n, lam)
    return poisson_cache[key]
Ejemplo n.º 57
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr  6 15:27:45 2019

@author: Liu Yang
"""

from scipy.stats import poisson
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('ggplot')

lamda=[2,6,10,20]
ps=[poisson(i) for i in lamda]
x=np.arange(30)
fig, ax = plt.subplots(nrows=1,ncols=2,figsize=(8,5))
for i in lamda: 
    ax[0].plot(x,poisson.pmf(x,i),label='lamda='+str(i))
    ax[1].plot(x,poisson.cdf(x,i),label='lamda='+str(i))
ax[0].set_xlabel('x')
ax[1].set_xlabel('x')
ax[0].set_ylabel('pmf')
ax[1].set_ylabel('cdf')
ax[0].legend(loc='best')
ax[1].legend(loc='best')
fig.suptitle('Poisson distribution')
#fig.tight_layout()
fig.savefig(r'C:\Users\10245\Desktop\数学笔记\泊松分布参数lamda的检验\poisson.png',dpi=300)
plt.show()

Ejemplo n.º 58
0
def common_dists():
    """Show some commonly used distributions."""
    # prep the subplots
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()

    # gaussian
    mu, sigma = 0, 1
    x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    axes[0].plot(x, norm.pdf(x, mu, sigma))
    axes[0].set_title('Gaussian PDF')
    axes[0].set_ylabel('density')
    axes[0].set_xlabel('x')
    axes[0].annotate(r'$\mu$',
                     xy=(mu, 0.4),
                     xytext=(mu - 0.09, 0.3),
                     arrowprops=dict(arrowstyle='->'))
    axes[0].annotate('',
                     xy=(mu - sigma, 0.25),
                     xytext=(mu + sigma, 0.25),
                     arrowprops=dict(arrowstyle='|-|, widthB=0.5, widthA=0.5'))
    axes[0].annotate(r'$2\sigma$', xy=(mu - 0.15, 0.22))

    # uniform distribution defined by min (a) and max (b)
    a, b = 0, 1
    peak = 1 / (b - a)
    axes[1].plot([a, a, b, b], [0, peak, peak, 0])
    axes[1].set_title('Uniform PDF')
    axes[1].set_ylabel('density')
    axes[1].set_xlabel('x')
    axes[1].annotate('min',
                     xy=(a, peak),
                     xytext=(a + 0.2, peak - 0.2),
                     arrowprops=dict(arrowstyle='->'))
    axes[1].annotate('max',
                     xy=(b, peak),
                     xytext=(b - 0.3, peak - 0.2),
                     arrowprops=dict(arrowstyle='->'))
    axes[1].set_ylim(0, 1.5)

    # exponential
    x = np.linspace(0, 5, 100)
    axes[2].plot(x, expon.pdf(x, scale=1 / 3))
    axes[2].set_title('Exponential PDF')
    axes[2].set_ylabel('density')
    axes[2].set_xlabel('x')
    axes[2].annotate(r'$\lambda$ = 3',
                     xy=(0, 3),
                     xytext=(0.5, 2.8),
                     arrowprops=dict(arrowstyle='->'))

    # Bernoulli of coin toss
    axes[3].bar(['heads', 'tails'], bernoulli.pmf([0, 1], p=0.5))
    axes[3].set_title('Bernoulli with fair coin toss (p = 0.5)')
    axes[3].set_ylabel('probability')
    axes[3].set_xlabel('coin toss result')
    axes[3].set_ylim(0, 1)

    # Binomial of tossing a fair coin many times
    x = np.arange(0, 10)
    axes[4].plot(x, binom.pmf(x, n=x.shape, p=0.5), linestyle='--', marker='o')
    axes[4].set_title('Binomial PMF - many Bernoulli trials')
    axes[4].set_ylabel('probability')
    axes[4].set_xlabel('number of heads')

    # Poisson PMF (probability mass function) because this is a discrete random variable
    x = np.arange(0, 10)
    axes[5].plot(x, poisson.pmf(x, mu=3), linestyle='--', marker='o')
    axes[5].set_title('Poisson PMF')
    axes[5].set_ylabel('mass')
    axes[5].set_xlabel('x')
    axes[5].annotate(r'$\lambda$ = 3',
                     xy=(3, 0.225),
                     xytext=(1.9, 0.2),
                     arrowprops=dict(arrowstyle='->'))

    # add a title
    plt.suptitle('Some commonly used distributions', fontsize=15, y=0.95)

    return axes
Ejemplo n.º 59
0
from scipy.stats import poisson
'''
1.Find the probability that atmost 5 defective fuses will be found in a box of 200 fuses if experience shows that 2 per cent of such fuses are defective.
'''
print("Assignment 1")
print("Probability of atmost 5 Defective", poisson.cdf(k=5, mu=200 * 0.02))
print("\n")
'''
2.The number of accidents in a year attributed to taxi drivers in a city follows a Poisson distribution with mean equal to 3. Out of 1,000 taxi drivers, find approximately the number of drivers with
a)No accidents in a year
b)More than 3 accidents in a year
'''
print("Assignment 2")

print("No of drivers with no accidents in a year",
      poisson.pmf(k=0, mu=3) * 1000)
print("No of drivers with more than 3 accidents in a year",
      (1 - poisson.cdf(k=3, mu=3)) * 1000)

print("\n")
'''

3.From the records of 10 Indian Army corps kept over 20 years the following data were obtained showing the number of deaths caused by the horse. Calculate the theoretical Poisson frequencies
No of Deaths:		0	1	2	3	4	Total
Frequency:		109	65	22	3	1	200
'''
print("Assignment 3")
mean_value = (0 * 109 + 1 * 65 + 2 * 22 + 3 * 3 + 4 * 1) / 200
print("Frequencies of Deaths")
for k in np.arange(4 + 1):
    print("No of Deaths={}, Frequency={}"\
import pandas as pd
import numpy as np
import pickle
from scipy.stats import poisson

from matplotlib import pyplot as plt

import utils

with open('2010-2018_patched_df.p', 'rb') as f:
    df = pickle.load(f)

low = 1550
high = 1600
mask = (df.Elo_Score_Before_1 > low) & (df.Elo_Score_Before_1 < high) & (
    df.Elo_Score_Before_2 > low) & (df.Elo_Score_Before_2 < high)
scores = np.concatenate(
    (df[mask]['Score_1'].values, df[mask]['Score_2'].values))

mean = np.average(scores)
print(mean)
# plt.hist(df.Elo_Score_Before_1)
# plt.show()

for i in range(10):
    print(i, np.sum(scores == i) / len(scores), poisson.pmf(i, mean))