예제 #1
0
파일: pvalue.py 프로젝트: carushi/reactIDR
 def plot_fitting_poisson(self, head):
     mu = self.params["poisson"][0]
     print([mu])
     weights = np.ones_like(self.data) / len(self.data)
     n, bins, patches = plt.hist(self.data,
                                 20,
                                 weights=weights,
                                 facecolor='green',
                                 alpha=0.75)
     # print(n, bins)
     cdfs, prev = [], -1
     for idx, x in enumerate(bins):
         if x >= 0 and np.round(x) > prev:
             cdfs.append(
                 poisson.cdf(np.round(x), mu) - poisson.cdf(prev, mu))
             prev = np.round(x)
     # print(cdfs)
     plt.plot([np.round(x) for x in bins if x >= 0],
              cdfs,
              label='poisson pmf',
              color='black')
     plt.scatter([np.round(x) for x in bins if x >= 0],
                 cdfs,
                 label='poisson pmf',
                 color='black')
     plt.savefig(head + "_poisson.png")
     plt.close()
예제 #2
0
    def auto_threshold(self):
        """
        Based on transient Event Detection over Background noise.

        The
        TODO should be elsewhere in the core part of the software
        :return:
        """
        # def poisson(x, mu):
        #     return np.exp(-mu) / factorial(x) * np.power(mu, x)

        false_negative_ratio = float(self.false_negative_sv.get()) / 100.0
        mu = self.PCH.time_axis[np.argmax(self.PCH.data)]
        chi = 0
        while 1 - poisson.cdf(chi, mu) > false_negative_ratio:
            chi += 1

        self.pch_graph.threshold = chi
        self.threshold_flank_sv.set(str(chi))

        # NB this value is hardcoded !
        false_negative_ratio = false_negative_ratio/1000.0

        mu = self.PCH.time_axis[np.argmax(self.PCH.data)]
        chi = 0
        while 1 - poisson.cdf(chi, mu) > false_negative_ratio:
            chi += 1

        self.pch_graph.threshold_burst = chi
        self.threshold_burst_sv.set(str(chi))

        self.pch_graph.plot(self.PCH)
예제 #3
0
def probTable(homeAtt, homeDef, awayAtt, awayDef, homeFactor):
    lambdaA = exp(homeAtt - awayDef + homeFactor)
    lambdaB = exp(awayAtt - homeDef)

    A = []
    B = []

    remA = 0
    remB = 0

    for i in range(0, 7):
        tempPois = poisson.cdf(i, lambdaA)
        A.append(tempPois - remA)
        remA = tempPois
        tempPois = poisson.cdf(i, lambdaB)
        B.append(tempPois - remB)
        remB = tempPois

    A.append(1 - remA)
    B.append(1 - remB)

    result = []
    for i in range(0, 8):
        for j in range(0, 8):
            result.append(A[i] * B[j])

    return result
예제 #4
0
파일: pvalue.py 프로젝트: carushi/reactIDR
 def plot_fitting_zip(self, head):
     mu, psi, zero = self.params["zip"][0], self.params["zip"][
         1], self.params["zip"][2]
     print([mu, psi, zero])
     zero = (1.0 - psi) + np.exp(-mu)
     weights = np.ones_like(self.data) / len(self.data)
     n, bins, patches = plt.hist(self.data,
                                 20,
                                 weights=weights,
                                 facecolor='green',
                                 alpha=0.75)
     # print(n, bins)
     cdfs, prev = [], -1
     for idx, x in enumerate(bins):
         if x >= 0 and np.round(x) > prev:
             if prev < 0:
                 cdfs.append(psi * poisson.cdf(np.round(x), mu) +
                             (1.0 - psi))
             else:
                 cdfs.append(psi * poisson.cdf(np.round(x), mu) -
                             psi * poisson.cdf(np.round(prev), mu))
             prev = np.round(x)
     # print(cdfs)
     plt.plot([int(x) for x in bins if x >= 0],
              cdfs,
              label='zip',
              color='black')
     plt.scatter([int(x) for x in bins if x >= 0],
                 cdfs,
                 label='zip',
                 color='black')
     plt.savefig(head + "_zip.png")
     plt.close()
예제 #5
0
    def ln_post(self,pars):
        '''
        The log-posterior to be used in the MCMC_fitter. This version cannot account for CRs.
        '''

        if np.any(pars< 0):
            return -np.inf
        
        ln_post = 0.
        for i in range(self.x_hat.size):
        
            if i == 0:
                lli =  np.log(norm.pdf(self.RM.noisy_counts[i],loc=pars[1],scale=self.RM.RON_adu))
            else:
                mc  = pars[0] * self.RM.RTS.group_times[i]

                nsig = 2
                mincnt = np.max([0,np.round(mc-nsig*np.sqrt(mc)).astype(np.int_)])
                maxcnt = np.max([mincnt+1,(mc+nsig*np.sqrt(mc)).astype(np.int_)])

                xi = np.arange(mincnt,maxcnt)
                poisson_pmf  = poisson.pmf(xi,mu=mc)/(poisson.cdf(maxcnt,mu=mc)-poisson.cdf(mincnt,mu=mc))
                gaussian_pdf = norm.pdf(xi,loc=self.RM.noisy_counts[i]-self.RM.noisy_counts[0],scale=self.RM.RON_adu)
                lli = np.log(np.sum(gaussian_pdf * poisson_pmf))            

            ln_post = ln_post + lli

        return ln_post
 def meet_ifr(self):
     #print(self.ifr,self.oq)
     array_es = [(1 - tifr) * toq for tifr, toq in zip(self.ifr, self.oq)]
     reorder_points = []
     for es, index in zip(array_es, range(len(self.mdolt))):
         s = 0  # s will be reorder point
         # stop if es-val is positive
         lb = 0  # lower bound
         for ub in range(int(pow(sys.maxsize,
                                 0.5))):  # theoritical infinity
             s = pow(2, ub)
             pp1 = poisson.cdf(s - 1, self.mdolt[index])
             pp2 = poisson.cdf(s, self.mdolt[index])
             val = (self.mdolt[index] * (1 - pp1)) - (s * (1 - pp2))
             if (es - val) >= 0:
                 break
         if (ub > 10):  # if within 2^10 = 1000 simply follow linear search
             lb = ub - 1
         for s in range(pow(2, lb) - 1,
                        pow(2, ub) + 1,
                        1):  # linear search in the found interval
             pp1 = poisson.cdf(s - 1, self.mdolt[index])
             pp2 = poisson.cdf(s, self.mdolt[index])
             val = (self.mdolt[index] * (1 - pp1)) - (s * (1 - pp2))
             if (es - val) >= 0:
                 break
         reorder_points.append(s)
     #print(reorder_points)
     return reorder_points
예제 #7
0
def poisson_test_random(x, lmd) :
    """Prob( Pois(n,p) >= x ) + randomization """
    p_down = 1 - poisson.cdf(x, lmd)
    p_up = 1 - poisson.cdf(x, lmd) + poisson.pmf(x, lmd)
    U = np.random.rand(x.shape[0])
    prob = np.minimum(p_down + (p_up-p_down)*U, 1)
    return prob * (x != 0) + U * (x == 0)
예제 #8
0
파일: stats.py 프로젝트: nhmc/LAE
def poisson_min_max_limits(conf, nevents):
    """ Calculate the minimum and maximum mean Poisson value mu
    consistent with seeing nevents at a given confidence level.

    conf: float
      95%, 90%, 68.3% or similar.
    nevents: int
      The number of events observed.

    Returns
    -------
    mulo, mhi : floats
      Mean number of events such that >= observed number of events
      nevents occurs in fewer than conf% of cases (mulo), and mean
      number of events such that <= nevents occurs in fewer than conf%
      of cases (muhi)
    """
    from scipy.stats import poisson
    nevents = int(nevents)
    conf = float(conf)

    if np.isnan(conf) or np.isnan(nevents):
        return np.nan, np.nan
    target = 1 - conf / 100.
    if nevents == 0:
        mulo = 0
    else:
        mulo = _bisect(lambda mu: 1 - poisson.cdf(nevents - 1, mu), target)

    muhi = _bisect(lambda mu: poisson.cdf(nevents, mu), target)

    return mulo, muhi
예제 #9
0
파일: stats.py 프로젝트: nhmc/H2
def poisson_min_max_limits(conf, nevents):
    """ Calculate the minimum and maximum mean Poisson value mu
    consistent with seeing nevents at a given confidence level.

    conf: float
      95%, 90%, 68.3% or similar.
    nevents: int
      The number of events observed.

    Returns
    -------
    mulo, mhi : floats
      Mean number of events such that >= observed number of events
      nevents occurs in fewer than conf% of cases (mulo), and mean
      number of events such that <= nevents occurs in fewer than conf%
      of cases (muhi)
    """
    from scipy.stats import poisson
    nevents = int(nevents)
    conf = float(conf)

    if np.isnan(conf) or np.isnan(nevents):
        return np.nan, np.nan
    target = 1 - conf/100.
    if nevents == 0:
        mulo = 0
    else:
        mulo = _bisect(lambda mu: 1 - poisson.cdf(nevents-1, mu), target)

    muhi  = _bisect(lambda mu: poisson.cdf(nevents, mu), target)

    return mulo, muhi
예제 #10
0
def plot_poisson():
    fig, ax = plt.subplots(1, 1)

    # This is prediction for Wawrinka in 2014
    mu = 7.869325

    x = np.arange(poisson.ppf(0.01, mu), poisson.ppf(0.999, mu))
    ax.plot(x, poisson.pmf(x, mu), 'wo', ms=8, label='poisson pmf')
    ax.vlines(x, 0, poisson.pmf(x, mu),
              colors=['b', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g'], lw=5, alpha=0.5)

    rv = poisson(mu)
    ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf')

    plt.title("Stanislas Wawrinka")
    plt.xlabel('# QF+ Finishes in 2014')
    plt.ylabel('Probability')

    prob0 = poisson.cdf(6, mu)
    prob123 = poisson.cdf(9, mu) - poisson.cdf(6, mu)
    probAbove3 = poisson.cdf(10000, mu) - poisson.cdf(9, mu)
    print prob0
    print prob123
    print probAbove3

    plt.show()
예제 #11
0
    def ln_post_CR(self,pars):
        '''
        The log-posterior to be used in the MCMC_fitter. This version accounts for CRs.
        '''

        if np.any(pars<= 0):
            return -np.inf
        
        ln_post = 0.
        nsig = 2
        for i in range(1,self.x_hat.size,1):
        
            mc  = pars[0] * (self.RM.RTS.group_times[i]-self.RM.RTS.group_times[i-1])
            sigmaintv = nsig*np.sqrt(mc+np.square(self.RM.RON_adu))

            mincnt = 0      # np.max([0,np.round(mc-sigmaintv).astype(np.int_)])
            maxcnt = 100 #np.max([mincnt+10,np.round(mc+sigmaintv).astype(np.int_)])

            xi = np.arange(mincnt,maxcnt)
            poisson_pmf  = poisson.pmf(xi,mu=mc)/(poisson.cdf(maxcnt,mu=mc)-poisson.cdf(mincnt,mu=mc))
            gaussian_pdf = norm.pdf(xi,loc=self.RM.noisy_counts[i]-self.RM.noisy_counts[i-1],scale=np.sqrt(2)*self.RM.RON_adu)
            lli = np.log(np.sum((gaussian_pdf * poisson_pmf)))            

            ln_post = ln_post + lli

        return ln_post
예제 #12
0
 def get_mapping(self) -> StateActionMapping[State, Action]:
     #We need to define the StateActionMapping for this Finite MDP
     mapping: StateActionMapping[State, Action] = {}
     list_actions: List[Action] = []
     #We start by defining all the available actions
     for i in range(self.H + 1):
         range_j = self.H - i
         for j in range(range_j + 1):
             list_actions.append(Action(i, j))
     self.list_actions: List[Action] = list_actions
     list_states: List[State] = []
     #Then we define all the possible states
     for i in range(1, self.W + 1):
         list_states.append(State(i))
     self.list_states: List[State] = list_states
     for state in list_states:
         submapping: ActionMapping[Action, StateReward[State]] = {}
         for action in list_actions:
             s: int = action.s
             l: int = action.l
             reward: float = state.wage * (self.H - l - s)
             pois_mean: float = self.alpha * l
             proba_offer: float = self.beta * s / self.H
             if state.wage == self.W:
                 #If you're in state W, you stay in state W with constant
                 #Probability. The reward only depends on the action you
                 #you have chosen
                 submapping[action] = Constant((state, reward))
             elif state.wage == self.W - 1:
                 #If you're in state W-1, you can either stay in your state
                 #or land in state W
                 submapping[action] = Categorical({
                     (state,
                      reward):
                         poisson.pmf(0,pois_mean)*(1-proba_offer),
                      (State(self.W),
                       reward):proba_offer+(1-proba_offer)*\
                          (1-poisson.pmf(0,pois_mean))
                     })
             else:
                 #If you're in any other state, you can land to any state
                 #Between your current state and W with probabilities
                 #as described before
                 dic_distrib = {}
                 dic_distrib[(state, reward)] = poisson.pmf(
                     0, pois_mean) * (1 - proba_offer)
                 dic_distrib[
                     (State(state.wage+1),
                      reward)] = proba_offer*poisson.cdf(1,pois_mean)\
                             +(1-proba_offer)*poisson.pmf(1,pois_mean)
                 for k in range(2, self.W - state.wage):
                     dic_distrib[(State(state.wage + k),
                                  reward)] = poisson.pmf(k, pois_mean)
                 dic_distrib[(State(self.W), reward)] = 1 - poisson.cdf(
                     self.W - state.wage - 1, pois_mean)
                 submapping[action] = Categorical(dic_distrib)
         mapping[state] = submapping
     return mapping
예제 #13
0
def poisson_approximate(s1, s2, s_both, sLen):

    if s1 >= s2:
        np = s2 * (s1 / float(sLen))
    else:
        np = s1 * (s2 / float(sLen))
    if s_both == 0.0: return round(np, 3), PSN.pmf(0, np)
    elif s_both < np: return round(np, 3), PSN.cdf(s_both, np)
    else: return round(np, 3), 1 - PSN.cdf(s_both, np)
예제 #14
0
def getOdds55(offForm1, defForm1, offForm2, defForm2):

    exp_score = offForm1 / 2 + offForm2 / 2 + defForm1 / 2 + defForm2 / 2

    prob4 = poisson.cdf(4, exp_score)
    prob5 = poisson.cdf(5, exp_score)
    prob6 = poisson.cdf(6, exp_score)

    return [prob4, prob5, prob6]
예제 #15
0
def _lids_pmf(x_lids, mu_lids):
    r'''Probability mass function of the LIDS'''

    # Expected number of counts
    mu = _lids_inverse_func(mu_lids)

    k1 = _lids_inverse_func(x_lids)
    k2 = _lids_inverse_func(x_lids + 1)
    return poisson.cdf(k=k1, mu=mu) - poisson.cdf(k=k2, mu=mu)
예제 #16
0
def predict_n_rel(des_prob, n_docs, mu):

    i = 0
    cum_prob = poisson.cdf(i, mu)
    while (i < n_docs) and (cum_prob < des_prob):
        i += 1
        cum_prob = poisson.cdf(i, mu)

    return i
예제 #17
0
def altCLs(Nbkg, Nobs, CL):
    Nsig=0
    while (True):
        clb = poisson.cdf(Nobs, Nbkg)
        clbs = poisson.cdf(Nobs, Nbkg+Nsig)
        cls = clbs/clb
        if (cls < (1 - CL)):
            break
        Nsig += 1
    return Nsig
예제 #18
0
def get_probs(lamda, max):
    v1 = poisson.pmf(range(max), lamda[0])
    v1[-1] = 1 - poisson.cdf(max - 1, lamda[0])

    v2 = poisson.pmf(range(max), lamda[1])
    v1[-1] = 1 - poisson.cdf(max - 1, lamda[1])

    M = v1[:, np.newaxis] * v2

    return M
예제 #19
0
def get_infection_type_frequencies(bacteria_count, viral_counts_dict):
    infection_type_dict = {}
    # no infections
    f = 1
    for i in viral_counts_dict:
        f *= (poisson.cdf(0, viral_counts_dict[i] / bacteria_count))
    infection_type_dict['none'] = f
    # alone
    for i in viral_counts_dict:  # only i in cell
        f = (poisson.cdf(MAX_INFECTIONS_PER_CELL,
                         viral_counts_dict[i] / bacteria_count) -
             poisson.cdf(0, viral_counts_dict[i] / bacteria_count))
        for j in viral_counts_dict:
            if j != i:
                f *= poisson.cdf(0, viral_counts_dict[j] / bacteria_count)
        infection_type_dict[i] = f
    # two virus types
    for i in viral_counts_dict:  # only i not in cell
        f = poisson.cdf(0, viral_counts_dict[i] / bacteria_count)
        for j in viral_counts_dict:
            if j != i:
                f *= (poisson.cdf(MAX_INFECTIONS_PER_CELL,
                                  viral_counts_dict[j] / bacteria_count) -
                      poisson.cdf(0, viral_counts_dict[j] / bacteria_count))
        infection_type_dict['_'.join([k for k in viral_counts_dict
                                      if k != i])] = f
    # all virus types
    f = 1
    for i in viral_counts_dict:
        f *= (poisson.cdf(MAX_INFECTIONS_PER_CELL,
                          viral_counts_dict[i] / bacteria_count) -
              poisson.cdf(0, viral_counts_dict[i] / bacteria_count))
    infection_type_dict['all_virus_types'] = f
    return infection_type_dict
예제 #20
0
def _error( value ) :
  '''Construct Bayesian errors using Poisson distribution'''
  # likelihood = P(value|lambda) using underlying Poisson assumption
  # error: lambdas with equal likelihood for which area in between is 68%
  lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10
  for i in range(5) :
    lambda_up -= step_size; lambda_down += step_size; step_size /= 10
    while (poisson.cdf(value,lambda_down) - poisson.cdf(value,lambda_up)) < 0.6826894921370859 :
      lambda_up += step_size
      while poisson.pmf(value,lambda_down) > poisson.pmf(value,lambda_up) : lambda_down -= step_size/10
  return (value-lambda_down,lambda_up-value)
예제 #21
0
def plot_pval(counts, Lam, seed=123, title="", outfile="", save=True):
    np.random.seed(123)
    (n, p) = counts.shape
    C = np.random.uniform(size=(n, p))
    pval = C * poisson.cdf(counts - 1, Lam) + (1 - C) * poisson.cdf(
        counts, Lam)
    plt.hist(pval.flatten(), bins=np.linspace(0, 1, 100))
    plt.title(title)
    if save:
        plt.savefig(outfile)
    plt.close()
예제 #22
0
파일: mopg.py 프로젝트: TrNi/covid-pgmorl
def evaluate_policy(new_policy, env, sample):
    env.time_step(new_policy)

    X_I, X_S = sample.X_I, sample.X_S
    currentV_I = sample.val_I
    currentV_L = sample.val_L

    meanX_S, meanX_I, meanX_R = env.sample_stochastic()
    errX_S, errX_I, errX_R = env.get_error()

    val_I = meanX_I
    val_L = new_policy

    lowXS = max(round(meanX_S - errX_S, 0), 0)
    uppXS = min(round(meanX_S + errX_S, 0), env.M)

    # lowXI=max(round(meanX_I-errX_I,0),0)
    # uppXI=min(round(meanX_I+errX_I,0),env.M)

    lowXR = max(round(meanX_R - errX_R, 0), 0)
    uppXR = min(round(meanX_R + errX_R, 0), env.M)

    lowI = max(X_S - uppXS, 0)
    uppI = min(X_S, X_S - lowXS)

    lowR = max(lowXR - (env.M - X_I - X_S), 0)
    uppR = min(X_I, uppXR - (env.M - X_I - X_S))

    for i in range(lowI, uppI):
        for j in range(lowR, uppR):

            probI = poisson.pmf(i, env.beta)
            probR = binom.pmf(j, uppR, env.gamma)

            if i == lowI:
                probI = poisson.cdf(i, env.beta)

            if i == uppI:
                probI = 1 - poisson.cdf(i - 1, env.beta)

            if j == lowR:
                probR = binom.cdf(j, uppR, env.gamma)
            if j == uppR:
                probR = 1 - binom.cdf(j - 1, uppR, env.gamma)

            val_I += 0.97 * probI * probR * currentV_I[X_I + i - j - 1,
                                                       X_S - i - 1]
            val_L += 0.97 * probI * probR * currentV_L[X_I + i - j - 1,
                                                       X_S - i - 1]

    objs = [val_I, val_L]
    return objs
예제 #23
0
def BuildSingleSurprise(psth, rate0, binsize, surplength,t_before):
    eps=np.finfo(np.float128).eps/1000000000   #this epsilon is used not to get inf when taking log10
    mu = rate0*binsize       # this is the mean value for the poisson blank, and corresponding binsize
    counts = np.zeros([2,surplength])
    Surprise = np.zeros([2,surplength])
            
    for j in np.arange(surplength):  
        i = j + t_before - binsize + 1  #this will make every surprise start at 0-1 ms bin
        for d in [0,1]:
            counts[d][j] = sum(psth[d][i:i+binsize])
            Surprise[d][j] = -np.log10(min(poisson.cdf(counts[d][j],mu)-eps,1-poisson.cdf(counts[d][j],mu)+eps)) 
    
    return Surprise
예제 #24
0
def pdcdf(x, lbd):
    '''
    Returns the pre-calculated value for the cumulative distribution
    function of a Poisson distribution.

    :param lbd: demand distribution expected value (lambda)
    :param x: point to calculate the cdf (integer)

    :returns: P(X <= x)
    :rtype: float

    :raises Warning: if the pair (x, lbd) does not exist in the table, and the return value is calculated on-the-fly using :mod:`scipy:scipy.stats`

    :examples:

    >>> round(pdcdf(1, 25),4) == round(poisson.cdf(1,25))
    True
    >>> round(pdcdf(1e6, 25),4) == round(poisson.cdf(1e6,25))
    True
    '''
    try:
        return pdtable[x, lbd]
    except IndexError:
        warnings.warn("Input too big for the pre-calculated table: %d %d" % (x, lbd))
        return poisson.cdf(x, lbd)
예제 #25
0
def error (event, numlines, rate, P):
	if event.count>3:
		return True
	return False
	if ( (1.0-poisson.cdf(event.count-1, float(rate*numlines)*(event.stop-event.start) ) )<P ):
		return True
	return False
예제 #26
0
def ks_1sample(mme_sample_data, last_week):

    sample_variance = np.var(mme_sample_data, ddof=1)
    sample_mean = np.mean(mme_sample_data)
    binom_p_mme = 1-(sample_variance/sample_mean)
    binom_n_mme = (sample_mean*sample_mean)/(sample_mean - sample_variance)
    poisson_lambda_mme = sample_mean
    geom_p_mme = 1/sample_mean
    geom_differences = []
    binom_differences = []
    poisson_differences = []

    for i in range(7):

        ecdf_left = i/7
        ecdf_right = (i+1)/7

        geom_cdf = geom.cdf(last_week[i], geom_p_mme)
        poisson_cdf = poisson.cdf(last_week[i], poisson_lambda_mme)
        binom_cdf = binom.cdf(last_week[i], binom_n_mme, binom_p_mme)

        geom_differences.append(abs(ecdf_left - geom_cdf))
        geom_differences.append(abs(ecdf_right - geom_cdf))

        binom_differences.append(abs(ecdf_left - binom_cdf))
        binom_differences.append(abs(ecdf_right - binom_cdf))

        poisson_differences.append(abs(ecdf_left - poisson_cdf))
        poisson_differences.append(abs(ecdf_right - poisson_cdf))

    return np.max(geom_differences), np.max(binom_differences), np.max(poisson_differences)
예제 #27
0
 def make(self, key):
     # -- get trial-spikes - use only trials in ProbeInsertionQuality.GoodTrial
     trial_spikes, trial_durations = (
         Unit.TrialSpikes *
         (experiment.TrialEvent & 'trial_event_type = "trialend"')
         & ProbeInsertionQuality.GoodTrial
         & key).fetch('spike_times', 'trial_event_time', order_by='trial')
     # -- compute trial spike-rates
     trial_spike_rates = [len(s) for s in trial_spikes
                          ] / trial_durations.astype(float)  # spikes/sec
     mean_spike_rate = np.mean(trial_spike_rates)
     # -- moving-average
     window_size = 6  # sample
     kernel = np.ones(window_size) / window_size
     processed_trial_spike_rates = np.convolve(trial_spike_rates, kernel,
                                               'same')
     # -- down-sample
     ds_factor = 6
     processed_trial_spike_rates = processed_trial_spike_rates[::ds_factor]
     # -- compute drift_qc from poisson distribution
     poisson_cdf = poisson.cdf(processed_trial_spike_rates, mean_spike_rate)
     instability = np.logical_or(
         poisson_cdf > 0.95, poisson_cdf < 0.05).sum() / len(poisson_cdf)
     # -- insert
     self.insert1(key)
     self.DriftMetric.insert1({**key, 'drift_metric': instability})
예제 #28
0
def KS1Sample(data_, true_distro_):
    state1_ = data_.columns[0]
    state2_ = data_.columns[1]

    MME = []
    if true_distro_ == 'poisson':
        MME = MMEPoisson(data_[state1_])
    elif true_distro_ == 'geometric':
        MME = MMEGeometric(data_[state1_])
    elif true_distro_ == 'binomial':
        MME = MMEBinomial(data_[state1_])
    else:
        return None

    print("The MME values : ", MME)

    uniq_val_state2, freq_val_state2 = np.unique(data_[state2_],
                                                 return_counts=True)
    cdf_state2 = np.cumsum(freq_val_state2)
    cdf_state2 = cdf_state2 / cdf_state2[-1]

    # print(uniq_val_state2)
    # print(freq_val_state2)
    # print(cdf_state2)

    max_diff = -1
    for i, value_ in enumerate(uniq_val_state2):
        left_ecdf, right_ecdf = 0, 0
        if i == 0:
            left_ecdf = 0
            right_ecdf = cdf_state2[i]
        elif i == len(uniq_val_state2) - 1:
            left_ecdf = cdf_state2[i - 1]
            right_ecdf = 1
        else:
            left_ecdf = cdf_state2[i - 1]
            right_ecdf = cdf_state2[i]

        true_cdf = 0
        if true_distro_ == 'poisson':
            true_cdf = poisson.cdf(value_, MME[0])
        elif true_distro_ == 'geometric':
            true_cdf = geom.cdf(value_, MME[0])
        elif true_distro_ == 'binomial':
            true_cdf = binom.cdf(value_, MME[0], MME[1])

        diff_ = max(abs(left_ecdf - true_cdf), abs(right_ecdf - true_cdf))
        if diff_ > max_diff:
            max_diff = diff_

        # print(value_, left_ecdf, right_ecdf, true_cdf, diff_)

    if max_diff > 0.05:
        print(
            "Since Max distance(%f) > 0.05, we reject Null Hypothesis (%s has same distribution as %s having true distribution of %s)\n\n"
            % (max_diff, state2_, state1_, true_distro_))
    else:
        print(
            "Since Max distance(%f) <= 0.05, we accept Null Hypothesis (%s has same distribution as %s having true distribution of %s)\n\n"
            % max_diff)
def calcPvalUponPoissondist(Count, Mean):
    from scipy.stats import poisson
    if Count >= Mean:
        Pval = poisson.sf(Count, Mean) * 2
    else:
        Pval = poisson.cdf(Count, Mean) * 2
    return Pval
def call_peaks(genome,
               unit_length=200,
               small_length=1000,
               medium_length=5000,
               large_length=10000):

    peaks_out = []
    for contig in genome:

        total_reads = sum(genome[contig])
        contig_length = len(genome[contig])

        if total_reads == 0:
            continue

        window_counts = window_read_counts(genome[contig], unit_length)
        window_sum = window_counts.sum(axis=1)

        small_bin_counts = calculate_bins(window_counts, small_length,
                                          unit_length)
        medium_bin_counts = calculate_bins(window_counts, medium_length,
                                           unit_length)
        large_bin_counts = calculate_bins(window_counts, large_length,
                                          unit_length)

        local_bin_sums = np.hstack(
            (small_bin_counts.sum(axis=1), medium_bin_counts.sum(axis=1),
             large_bin_counts.sum(axis=1)))

        local_lambdas = (local_bin_sums / np.array(
            [small_length, medium_length, large_length])) * unit_length
        lambda_bg = np.ones(
            window_sum.shape) * (total_reads / contig_length) * unit_length

        all_lambdas = np.hstack((lambda_bg, local_lambdas))
        max_lambdas = np.amax(all_lambdas, axis=1)

        p_vals = 1 - poisson.cdf(window_sum.astype(int),
                                 mu=max_lambdas.astype(float))
        p_vals = np.transpose(p_vals.astype(np.longdouble))[0]

        qvalue = importr('qvalue')
        q_vals = np.array(qvalue.qvalue(FloatVector(p_vals))[2])
        q_vals = np.hstack(
            (np.transpose(np.matrix(list(range(1,
                                               len(q_vals) + 1)))),
             np.transpose(np.matrix(q_vals))))
        qv_df = pd.DataFrame(q_vals)
        qv_df.columns = ['Position', 'qvalue']

        peak_indices = np.array(
            qv_df.query('qvalue < 0.01')['Position'].tolist()).astype(int)

        peaks = indices_to_peaks(peak_indices)
        peaks = correct_peaks(peaks, unit_length)

        for peak in peaks:
            peaks_out.append([contig, peak[0], peak[1]])

    return peaks_out
예제 #31
0
 def check_significance(self, num_pairs, critValue, cut_off=0.10):
     """
     This is mostly a playing about function, this tests to see if the number of calculated cointegration pairs is
     more than you expected from random deviation to see if the results are significant
     i.e. If you calculate 12 pairs when you tests 100 at a 0.1 critvalue is that significant?
     This is used to adjust for multiple comparison bias
     :param num_pairs: The number of calculated pairs
     :param critValue: The critical value you used
     :param cut_off: The cut off to determine whether or not the results are significant
     :return:
     """
     num_stocks = len(self.portfolio)
     num_comparison = (num_stocks) * (num_stocks - 1) / 2
     x_val = round(critValue * num_comparison)
     fish = 1 - poisson.cdf(k=num_pairs - 1, mu=x_val)
     if fish < cut_off:
         print(
             "These results seem significant: \n Expected: {} | Pairs: {} | p: {}"
             .format(x_val, num_pairs, fish))
         return 1
     else:
         print(
             "These results may be insignificant:\n Expected: {} | Pairs: {} | p: {}"
             .format(x_val, num_pairs, fish))
         return 0
예제 #32
0
def ppois(q,mu):
    """
    Calculates the cumulative of the Poisson distribution
    """
    from scipy.stats import poisson
    result=poisson.cdf(k=q,mu=mu)
    return result
def poisson_safety(segs, injTable, livetime):
    """
    Return a tuple containing the number of vetoed injections, the number
    expected, and the Poisson safety probability based on the number of
    injections vetoed relative to random chance according to Poisson statistics.

    Arguments:

      segs : glue.segments.segmentlist
        list of segments to be tested
      injTable : glue.ligolw.table.Table
        table of injections
      livetime : [ float ]
        livetime of search
  """

    deadtime = segs.__abs__()

    get_time = def_get_time(injTable.tableName)
    injvetoed = len([inj for inj in injTable if get_time(inj) in segs])

    injexp = len(injTable) * float(deadtime) / float(livetime)

    prob = 1 - poisson.cdf(injvetoed - 1, injexp)

    return injvetoed, injexp, prob
예제 #34
0
def calc_KS_1_sample_test(x_points, parameters_list, data, distribution_name, column_type , column_name):
    dict_name = 'KS_' + distribution_name +'_cols'
    dict_name = ['x', 'F_cap_x', 'F_cap_DC_left', 'F_cap_DC_right', 'left_diff_abs', 'right_diff_abs']
    row_list = []
    for x in x_points:
        if(distribution_name == 'binomial'):
            #Find cdf of binomial at given point x
            F_cap_x = binom.cdf(x, parameters_list[0], parameters_list[1])
        if(distribution_name == 'poisson'):
            #Find cdf of poisson at given point x
            F_cap_x = poisson.cdf(x, parameters_list[0])
        if(distribution_name == 'geometric'):
            #Find cdf of geometric at given point x
            F_cap_x = geom.cdf(x, parameters_list[0])
        # Find CDF to the left of point x in the sorted DC dataset
        F_cap_DC_left = get_left_cdf(data ,column_name, x, 'DC_eCDF')
        # Find CDF to the right of point x in the sorted DC dataset
        F_cap_DC_right = get_right_cdf(data, column_name, x, 'DC_eCDF')
        # Find absolute difference between left CDFs of x points and DC datasets
        left_diff_abs = round(abs(F_cap_x - F_cap_DC_left), 4)
        # Find absolute difference between right CDFs of x points and DC datasets
        right_diff_abs = round(abs(F_cap_x - F_cap_DC_right), 4)
    
        row = [x, F_cap_x, F_cap_DC_left, F_cap_DC_right, left_diff_abs, right_diff_abs]
        row_dict = dict(zip(dict_name, row))
        row_list.append(row_dict)
    
    # Build KS Test Table (represented as a dataframe)    
    df_name = 'KS_' + distribution_name +'_df'
    df_name = pd.DataFrame(row_list, columns=dict_name)
    
    # Calculate KS statistic value
    max_diff_x = []
    d_right = df_name.iloc[df_name['right_diff_abs'].idxmax(axis=1)][['x', 'right_diff_abs']]
    d_left = df_name.iloc[df_name['left_diff_abs'].idxmax(axis=1)][['x', 'left_diff_abs']]
    if d_right['right_diff_abs'] == d_left['left_diff_abs']:
        print("KS Statistic is {0} at x = {1} and {2}".format(d_right['right_diff_abs'], d_left['x'], d_right['x']))
        max_diff_x.append(d_right['x'])
        max_diff_x.append(d_left['x'])
    elif d_right['right_diff_abs'] > d_left['left_diff_abs']:
        print("KS Statistic is {0} at x = {1}".format(d_right['right_diff_abs'], d_right['x']))
        max_diff_x.append(d_right['x'])
    else:
        print("KS Statistic is {0} at x = {1}".format(d_left['left_diff_abs'], d_left['x']))
        max_diff_x.append(d_left['x'])

    # Reject/Accept Null Hypothesis based on calculated KS Statistic d and given threshold=0.05
    d = max(d_right['right_diff_abs'], d_left['left_diff_abs'])
    critical_value = 0.05
    hypothesis_type = 'confirmed positive cases' if column_type == 'confirmed' else column_type

    if d > critical_value:
        print("Rejected Null Hypothesis: We reject the hypothesis that the distribution of daily {0} in DC is {3}, as KS Statistic d = {1} exceeds threshold {2}".format(hypothesis_type, d, critical_value, distribution_name))
        print()
    else:
        print("Failed to reject Null Hypothesis: We accept the hypothesis that the distribution of daily {0} is same in both CT and DC, as KS Statistic d = {1} does not exceed threshold {2}".format(hypothesis_type, d, critical_value))
        print()
        
        
    return max_diff_x
예제 #35
0
def calc_f3(qbar,L):
    val = np.e**(-qbar)
    for k in range(1,L):
        coeff = ((2*k) - 1) + 2
        val += coeff * poisson.cdf(k,qbar)

    return val / (L**2)
예제 #36
0
def clean_bkg(img, bkg):
    """
    Subtract statistically the background from a Poisson image

    :param img: Input image
    :type img: class:`numpy.ndarray`
    :param bkg: Background map
    :type bkg: class:`numpy.ndarray`
    :return: Background subtracted Poisson image
    :rtype: class:`numpy.ndarray`
    """
    id = np.where(img > 0.0)
    yp, xp = np.indices(img.shape)
    y = yp[id]
    x = xp[id]
    npt = len(img[id])
    nsm = 10
    ons = np.ones((nsm, nsm))
    timg = generic_filter(img, heaviside, footprint=ons, mode='constant')
    tbkg = generic_filter(np.ones(img.shape) * bkg,
                          heaviside,
                          footprint=ons,
                          mode='constant',
                          cval=0,
                          origin=0)
    prob = 1. - poisson.cdf(timg[id], tbkg[id])
    vals = np.random.rand(npt)
    remove = np.where(vals < prob)
    img[y[remove], x[remove]] = 0
    return img
예제 #37
0
def poisson_safety(segs, injTable, livetime):

  """
    Return a tuple containing the number of vetoed injections, the number
    expected, and the Poisson safety probability based on the number of
    injections vetoed relative to random chance according to Poisson statistics.

    Arguments:

      segs : glue.segments.segmentlist
        list of segments to be tested
      injTable : glue.ligolw.table.Table
        table of injections
      livetime : [ float ]
        livetime of search
  """

  deadtime = segs.__abs__()

  get_time = def_get_time(injTable.tableName)
  injvetoed = len([ inj for inj in injTable if get_time(inj) in segs ])

  injexp = len(injTable)*float(deadtime) / float(livetime)

  prob = 1 - poisson.cdf(injvetoed-1, injexp)

  return injvetoed, injexp, prob
def myKStest(loadFile='optSxJKtext32.txt', nSims=10000):
    """
    no comment
  """
    from scipy.stats import poisson
    from scipy.stats import kstest

    # load data
    PvalMinima, XvalMinima, SxEnsembleMin = np.loadtxt(loadFile, unpack=True)
    Pvals = PvalMinima[1:]  #ditch SMICA
    Xvals = XvalMinima[1:]  #ditch SMICA
    Svals = SxEnsembleMin[1:]  #ditto

    # find indices of two groupings of x values
    gt = np.where(Xvals > 0)
    lt = np.where(Xvals < 0)

    # recover integer values that created p-values
    pInts = np.rint(Pvals * nSims)

    PmeanGt = np.mean(pInts[gt])
    PmeanLt = np.mean(pInts[lt])
    Pmean = np.mean(pInts)

    cdf = lambda k: poisson.cdf(k, np.rint(PmeanGt))

    print 'closest integer to mean: ', np.rint(PmeanGt)
    result = kstest(pInts[gt], cdf)

    return result
예제 #39
0
def Pvalue(AS, NumBases):
    ExpectedMMrate = 0.01
    ExpectedMM = ExpectedMMrate*float(NumBases)
    if NumBases == 0.0:
        return "-"
    ActualMM = int(ceil(float(AS)/(-6.0)))
    prob = 1 - poisson.cdf(ActualMM, ExpectedMM)
    return prob
예제 #40
0
	def estimate_revenue(self, st_time, pk_zone, dp_zone, th, car_type="UberX", zero_threshold=0.1):
		info = self.G[self.time_to_index(st_time)][pk_zone][dp_zone]
		mu = float(info[0]) / self.data_date_range
		prob = 1.0 - poisson.cdf(th, mu)
		if math.isnan(prob) or prob < zero_threshold:
			return 0
		revenue = self.compute_fare(info[1], info[2], car_type)
		return prob * revenue
예제 #41
0
def local_coverage_score(covlist,window_size=11):
    ''' find local coverage dips
        Returns list of scores for each position in covlist
    '''
    localmeans = [scipy.mean(covlist[max(1,i-window_size):min(i+window_size+1,len(covlist))]) for i,cov in enumerate(covlist)]
    pvals      = [poisson.cdf(cov,localmeans[i]) for i,cov in enumerate(covlist)]
    pvals[0]   = 1
    scores     = [(-10 * scipy.log10(pv)) if pv != 0 else 0 for pv in pvals]
    return scores,localmeans
예제 #42
0
파일: Statistics.py 프로젝트: cschoi/Books
def poissonTailTest(counts, eventRate, oneSided=True):
     
  counts = array(counts)
  diffs = abs(counts-eventRate)
  result = poisson.cdf(eventRate-diffs, eventRate)
  if not oneSided:
    result *= 2

  return result
    def testprobabilitiespoisson(self):
        prob = zeros((4,3))
        exp_value = self.data.calculate_equation(self.coefficients[0])
        prob[:,0] = poisson.pmf(0, exp_value)
        prob[:,1] = poisson.pmf(1, exp_value)
        prob[:,2] = 1 - poisson.cdf(1, exp_value)

        prob_model = self.model.calc_probabilities(self.data)
        prob_diff = all(prob == prob_model)
        self.assertEqual(True, prob_diff)
예제 #44
0
def main():
    freq = 1020
    num_docs = 31254
    lambda_ = freq/num_docs
    for i in itertools.count(0):
        # probability of the term occuring <=i times within a document is poisson.cdf(i, lambda_)
        # get the probability of the term occuring >i times within some document in the collection
        if 1 - poisson.cdf(i, lambda_)**num_docs < 0.01:
            print(i+1)
            break
예제 #45
0
파일: filterFDR.py 프로젝트: KateK/KNIFE
def getPval(alignedReads):
    if len(alignedReads) > 0:
        useMMrate = globalDecoyMMrate
        
        # total number of mismatches observed for all reads aligning to this junction, rounded to get integer which is required for poisson.cdf
        num_mm = int(ceil(sum([x.readStat for x in alignedReads]) / -6.0))  
        num_bases = sum([x.juncRead.readLen for x in alignedReads]) + sum([x.useMate.readLen for x in alignedReads if x.useMate != None])
        
        return 1 - poisson.cdf(num_mm, useMMrate*num_bases)
    else:
        return "-"
예제 #46
0
def _error( value ) :
  '''Construct frequentist errors using Poisson distribution'''
  # up error: smallest lambda for which P(n<=nobs|lambda) < (1-0.68268...)/2 = 0.15865...
  # down error: largest lambda for which P(n>=nobs|lambda) < (1-0.68268...)/2 = 0.15865...
  lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10
  if value == 0 : return (0,1.8410216450100005) # save time with precomputed value
  if value < 1 : lambda_up, lambda_down, step_size =  1.8, 0.0, 0.1
  for i in range(5) :
    lambda_up -= step_size; lambda_down += step_size; step_size /= 10
    while poisson.cdf( value, lambda_up ) > 0.15865525393145705 : lambda_up += step_size
    while poisson.sf( value-1, lambda_down ) > 0.15865525393145705 : lambda_down -= step_size
  return (value-lambda_down,lambda_up-value)
예제 #47
0
def add_gc_bias(meancoverages,targetcoverage):
	rand=poisson.rvs(targetcoverage)
	cumprob=poisson.cdf(rand,targetcoverage) # cdf(x, mu, loc=0)	Cumulative density function.
	
	toret=[]
	for cov in meancoverages:
		if cov==0:
			toret.append(0)
		else:
			t=int(poisson.ppf(cumprob,cov)) # ppf(q, mu, loc=0)	Percent point function (inverse of cdf percentiles).
			toret.append(t)
	return toret
예제 #48
0
def poisson_marginals(means, accuracy=1e-10):
    """
    Finds the probability mass functions (pmfs) and approximate supports of a set of
    Poisson random variables with means specified in input "means". The
    second argument, "acc", specifies the desired degree of accuracy. The
    "support" is taken to consist of all values for which the pmfs is greater
    than acc.

    Inputs:
    means: the means of the Poisson RVs
    acc: desired accuracy

    Outputs:
    pmfs: a cell-array of vectors, where the k-th element is the probability
    mass function of the k-th Poisson random variable.
    supports: a cell-array of vectors, where the k-th element is a vector of
    integers of the states that the k-th Poisson random variable would take
    with probability larger than "acc". E.g., P(kth
    RV==supports{k}(1))=pmfs{k}(1);

    Code from the paper: 'Generating spike-trains with specified
    correlations', Macke et al., submitted to Neural Computation

    Adapted from `<http://www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_
    
    Parameters
    ----------
    means : Type
        Description
    accuracy : int, optional
        Description (default 1e-10)
    
    Returns
    -------
    Value : Type
        Description
    """
    from scipy.stats import poisson
    import math

    cmfs = []
    pmfs = []
    supps = []

    for k in range(len(means)):
        cmfs.append(poisson.cdf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k]))
        pmfs.append(poisson.pmf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k]))
        supps.append(np.where((cmfs[k] <= 1 - accuracy) & (pmfs[k] >= accuracy))[0])
        cmfs[k] = cmfs[k][supps[k]]
        pmfs[k] = poisson.pmf(supps[k], means[k])

    return np.array(pmfs), np.array(cmfs), np.array(supps)
예제 #49
0
파일: uber_max.py 프로젝트: ecsark/ubermax
	def estimate_revenue2(self, st_time_index, pk_zone, dp_zone, th, car_type="UberX", zero_threshold=0.1):
		info = self.G[st_time_index][pk_zone][dp_zone]
		mu = float(info[0]) / self.data_date_range
		key_tuple = (th, mu)
		if key_tuple not in self.cached_cdf:
			self.cached_cdf[key_tuple] = 1.0 - poisson.cdf(*key_tuple)
		prob = self.cached_cdf[key_tuple]
		if math.isnan(prob) or prob < zero_threshold:
			return 0
		revenue = self.compute_fare(info[1], info[2], car_type)
		if revenue > 300:
			print st_time_index, pk_zone, dp_zone
		return prob * revenue
def prior_calculations(lbda,maxlen,eta,maxlhs):
    #First normalization constants for beta
    beta_Z = poisson.cdf(maxlhs,eta) - poisson.pmf(0,eta)
    #Then the actual un-normalized pmfs
    logalpha_pmf = {}
    for i in range(maxlen+1):
        try:
            logalpha_pmf[i] = poisson.logpmf(i,lbda)
        except RuntimeWarning:
            logalpha_pmf[i] = -inf
    logbeta_pmf = {}
    for i in range(1,maxlhs+1):
        logbeta_pmf[i] = poisson.logpmf(i,eta)
    return beta_Z,logalpha_pmf,logbeta_pmf
예제 #51
0
def getExpected(mu):
    """
    Given a mean coverage mu, determine the AUC, X-intercept, and elbow point
    of a Poisson-distributed perfectly behaved input sample with the same coverage
    """
    x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1))  # This will be an appropriate range
    pmf = poisson.pmf(x, mu=mu)
    cdf = poisson.cdf(x, mu=mu)
    cs = np.cumsum(pmf * x)
    cs /= max(cs)
    XInt = cdf[np.nonzero(cs)[0][0]]
    AUC = sum(poisson.pmf(x, mu=mu) * cs)
    elbow = cdf[np.argmax(cdf - cs)]
    return (AUC, XInt, elbow)
예제 #52
0
 def pvalue(lo, hi):
     "Compute p value in window [lo, hi)"
     d, m = data[lo:hi].sum(), mc[lo:hi].sum()
     if m == 0:
         # MC prediction is zero. Not sure what then..
         assert d == 0, "Data = {0} where the prediction is zero..".format(d)
         return 1
     if d < m: return 1 # "Dips" get ignored.
         
     # P(d >= m)
     p = 1 - poisson.cdf(d-1, m)
     
     if verbose and edges:
         print "{0:2} {1:2} [{2:8.3f}, {3:8.3f}] {4:7.0f} {5:7.3f} {6:.5f} {7:.2f}".format(
             lo, hi, edges[lo], edges[hi], d, m, p, -log(p))
             
     return p
예제 #53
0
파일: FancyPlot.py 프로젝트: sgravel/tracts
def find_bounds(mean, alpha):
    """ Find both the lower and upper bounds for a given mean value and
        dispersion parameter in a poisson distribution.
    """
    fun = lambda i: poisson.cdf(i,mean)

    upper = None
    lower = None
    i = 0
    while True:
        if upper is None and fun(i) > 1 - alpha / 2.0:
            upper = i
        if lower is None and fun(i) > alpha / 2.0:
            lower = i
        if upper is not None and lower is not None:
            return lower, upper
        i += 1
예제 #54
0
def test_poisson():

    # Test we can at match a Binomial distribution from scipy

    mu = 2
    dist = lk.Poisson()

    x = np.random.randint(low=0, high=5, size=(10,))

    p1 = poisson.logpmf(x, mu)
    p2 = dist.loglike(x, mu)

    np.allclose(p1, p2)

    p1 = poisson.cdf(x, mu)
    p2 = dist.cdf(x, mu)

    np.allclose(p1, p2)
예제 #55
0
    def cdf(self, y, f):
        r"""
        Cumulative density function of the likelihood.

        Parameters
        ----------
        y: ndarray
            query quantiles, i.e.\  :math:`P(Y \leq y)`.
        f: ndarray
            latent function from the GLM prior (:math:`\mathbf{f} =
            \boldsymbol\Phi \mathbf{w}`)

        Returns
        -------
        cdf: ndarray
            Cumulative density function evaluated at y.
        """
        mu = np.exp(f) if self.tranfcn == 'exp' else softplus(f)
        return poisson.cdf(y, mu=mu)
def poisson_threshold(dataset):
    """
    Given a date,
    return a threshold value which will produce alerts_per_day or more instances
    in a day with probability p or less.  That is:
    p(X > alerts_per_day) <= admin_conf.alert_confidence.

    :param dataset: sorted numpy array of alert scores
    :type dataset: numpy array
    :returns: float -- calculated threshold
    """
    # First, find the target parameter
    mu = Conf.alerts_per_day
    amount = mu
    error = 1
    iters = 0

    while error > Conf.alert_confidence and iters < 100:
        # Keep trying until we get closer
        iters = iters + 1
        amount = float (amount) / 2
        prob = 1 - poisson.cdf(Conf.alerts_per_day, mu)
        error = math.fabs(Conf.alert_confidence - prob)
        if prob > Conf.alert_confidence:
            # Need to keep decreasing lambda
            mu = mu - amount
        else:
            # We overshot
            mu = mu + amount

    # Now, figure out the threshold so that the average number of investigations
    # per day is mu.  I think we can just take the score of the mu'th instance
    # for each day, and then average those. 
    numDays = 0
    scoreSum = 0
    # if we don't have enough alerts for a given day to use a score, just 
    # use zero for that day's scoreSum - i.e. we want to see all alerts
    for npArray in dataset:
        if len(npArray) > int(mu):
            scoreSum += npArray[int(mu)]
        numDays = numDays + 1

    return 0.0 if numDays == 0 else float(scoreSum) / numDays
예제 #57
0
def getFourthSNP(ipDir, prefix):
    thirdSNP = os.path.join(ipDir, prefix + '_3' + '.' + FileExts.SNP)
    fourthSNP = os.path.join(ipDir, prefix + '_4' + '.' + FileExts.SNP)
    with open(thirdSNP, 'r') as thirdSNPFile:
        with open(fourthSNP, 'w') as fourthSNPFile:
            for line in thirdSNPFile:
                line = line.strip()
                cols = line.split('\t')
                dpVal = ''
                
                for eqTup in cols[1].split(';'):
                    if eqTup.startswith('DP='):
                        dpVal = float(eqTup[3:])
                        break
                
                varReads =  float(cols[7])
                poissonCDF = 1-poisson.cdf(varReads - 1, dpVal*0.01)
                #poissonCDF = 1-myPoisson.cdf(varReads - 1, dpVal*0.01)
                cols.insert(0, str(poissonCDF))
                fourthSNPFile.write('\t'.join(cols) + '\n')
예제 #58
0
def poisson_pvalue(scores, window):
    ## histogram
    import numpy
    hist, bin_edges = numpy.histogram(scores, range(0,max(scores)+window,window))
    start, end = 0, 0
    for i in range(len(hist)):
        if hist[i] == max(hist):
            start = bin_edges[i]
            end = bin_edges[i+1]
            break
    mean, cc = 0, 0
    for s in scores:
        if start <= s and s < end:
            mean += s
            cc += 1
    mean = mean/float(cc)
    from scipy.stats import poisson
    pvalues = []
    for s in scores:
        pvalues.append(1.0-poisson.cdf(s, mean))
    return pvalues, mean
예제 #59
0
def P_breakpoints_in_interval(I, q, n):
	"""
		param:
		q		- breakpoint ratio
		I 		- Interval of lenght I
		n 		- number of break points 

		Calculates:
		k		- Number of expected breakpoints within an interval
		P( n bp in I) 	-	Probability of n breakpoints in I

		returns: 
		P( n bp in I)
	"""
	k = q * I

	# for i in range(0,n):


	# math.exp(-k)
	#print poisson.cdf(n, k)
	return poisson.cdf(n, k)
예제 #60
0
def safety(segments, injections, threshold=SAFETY_THRESHOLD):
    """The safety of these segments with respect to vetoing GW signals

    The 'safety' of a given segment list is determined by comparing the
    number of coincidences between the veto segments and injection segments
    to random chance.

    A segment list is returned as safe (`True`) if the Poisson significance
    of the number of injection coincidences exceeds the threshold (default
    5e-3).

    Parameters
    ----------
    segments : `~gwpy.segments.DataQualityFlag`, `~glue.segments.segmentlist`
        the set of segments to test
    injections : `~glue.segments.segmentlist`
        the set of injections against which to compare
    threshold : `float`, optional, default: 5e-3
        the Poission significance value above which a set of segments is
        declared unsafe

    Returns
    -------
    safe : `bool`
        the boolean statement of whether this segment list is safe (`True`)
        or not (`False`)
    """
    if not isinstance(injections, DataQualityFlag):
        injections = DataQualityFlag(active=injections)
    # segment info
    deadtime = float(abs(segments.active))
    livetime = float(abs(segments.known))
    # injection coincidence
    numveto = len([inj for inj in injections.active if
                   inj.intersects(segments)])
    numexp = len(injections) * deadtime / livetime
    # statistical significance
    prob = 1 - poisson.cdf(numveto - 1, numexp)
    return prob < threshold