Beispiel #1
0
def getSequence(data, ff):
    print "Generating observed sequence..."

    num_reads_p = len(data) * ff / 2  # Paternal reads are 1/2 of the fetus'
    num_reads_m = len(data) - num_reads_p  # Maternal reads are the rest
    expected_coverage_p = num_reads_p * READ_LEN * BUCKET_SIZE / CHR_LEN
    expected_coverage_m = num_reads_m * READ_LEN * BUCKET_SIZE / CHR_LEN

    # Generate two distributions, one for the father, and one for the mother
    low_p, high_p = poisson.interval(0.333, expected_coverage_p)
    low_m, high_m = poisson.interval(0.333, expected_coverage_m)

    # Count the number of times a read comes from m or p in each bucket
    coverage_p = defaultdict(lambda: 0)
    coverage_m = defaultdict(lambda: 0)
    for read in data:
        pos = int(read[0])
        read_len = len(read[1])
        for i in range(read_len):
            bucket = (pos + i) / BUCKET_SIZE
            if read[3] == "-":
                coverage_p[bucket] += 1
                coverage_m[bucket] += 1
            elif read[3] == "p":
                coverage_p[bucket] += 1
            else:
                coverage_m[bucket] += 1

    # Decide if the number of reads represents a low, normal, or high distribution
    coverage = {}
    keys = coverage_p.keys() + list(
        set(coverage_m.keys()) - set(coverage_p.keys()))

    for key in range(CHR_LEN / BUCKET_SIZE + 1):
        if coverage_p[key] < low_p:
            p_val = "L"
        elif coverage_p[key] < high_p:
            p_val = "N"
        else:
            p_val = "H"
        if coverage_m[key] < low_m:
            m_val = "L"
        elif coverage_m[key] < high_m:
            m_val = "N"
        else:
            m_val = "H"
        val = (p_val, m_val)
        coverage[key] = val

    # Sort it by position
    observed_seq = []
    for key in sorted(coverage):
        observed_seq.append(coverage[key])

    print "Done."
    return observed_seq
def poisson_mode_and_alpha(expected, alpha):
    """
    Returns mode number of expected substitutions and upper & lower limits
    within which falls `alpha` fraction of the occurrences for a poisson
    distribution with lambda=`expected` value.

    Parameters
    ----------
    expected : float
        Expected substitutions.
    alpha : float
        Probability interval containing alpha fraction of the Poisson
        distribution.

    Returns
    -------
    mode : int
        Typical number of substitutions expected.
    lower_limit, upper_limit : int
        Lower number of substitutions between which alpha fraction of the
        Poisson distribution is contained.
    upper_limit : int
        Lower number of substitutions between which alpha fraction of the
        Poisson distribution is contained.
    """
    mean, var = poisson.stats(expected, moments='mv')
    mode = math.floor(mean)  # round down to closest integer
    lower_limit, upper_limit = poisson.interval(alpha, expected, loc=0)

    return mode, lower_limit, upper_limit
def EPM_Poisson_countd(mu, library_size):
    '''Returns the Poisson mutation rate distribution for a given library size

    Average rate is set by mu, library size is the number of sequnces in the library
    Returns two lists, probs_list contains the number of sequences with the corresponding number of mutations in mut_list
    '''

    probs_list = []
    mut_list = []
    alpha = 1-1/(library_size*10)
    a,b = poisson.interval(alpha, mu, loc=0)
    a = int(a)
    b = int(b)
    for k in range(a,b+1):
        k_count = int(round(poisson.pmf(k,mu)*library_size,0))
        if k_count != 0:
            probs_list.append(k_count)
            mut_list.append(k)

    #If, due to rounding, the total library size is greater than expected
    #Subtract the difference from the mean (mu)

    dif = sum(probs_list) - library_size
    mutation_list = [i for i in range(a,b+1)]
    index = mutation_list.index(mu)
    probs_list[index] -= dif

    return probs_list, mut_list
Beispiel #4
0
def Tukey_outliers(set_of_means, FDR=0.005, supporting_interval=0.5, verbose=False):
    """
    Performs Tukey quintile test for outliers from a normal distribution with defined false discovery rate
    :param set_of_means:
    :param FDR:
    :return:
    """
    # false discovery rate v.s. expected falses v.s. power
    q1_q3 = norm.interval(supporting_interval)
    FDR_q1_q3 = norm.interval(1 - FDR)  # TODO: this is not necessary: we can perfectly well fit it with proper params to FDR
    multiplier = (FDR_q1_q3[1] - q1_q3[1]) / (q1_q3[1] - q1_q3[0])
    l_means = len(set_of_means)

    q1 = np.percentile(set_of_means, 50*(1-supporting_interval))
    q3 = np.percentile(set_of_means, 50*(1+supporting_interval))
    high_fence = q3 + multiplier*(q3 - q1)
    low_fence = q1 - multiplier*(q3 - q1)

    if verbose:
        print 'FDR:', FDR
        print 'q1_q3', q1_q3
        print 'FDRq1_q3', FDR_q1_q3
        print 'q1, q3', q1, q3
        print 'fences', high_fence, low_fence

    if verbose:
        print "FDR: %s %%, expected outliers: %s, outlier 5%% confidence interval: %s"% (FDR*100, FDR*l_means,
                                                                                  poisson.interval(0.95, FDR*l_means))

    ho = (set_of_means < low_fence).nonzero()[0]
    lo = (set_of_means > high_fence).nonzero()[0]

    return lo, ho
    def count_blue_stars_in_contour(self,completeness,blue_cut=1.3,kupperlim = 15.,klowerlim = 12.,ph_qual = False,plot=False,catalog=None,survey=None):
        """
        Determine which of the stars inside
        the contour are blue. 
        
        Estimate a confidence 0.95 confidence interval
        for the number of stars present given the detection
        completeness. Approximate by binning on magnitude
        and using the binomial distribution for each of these
        bins (with a fixed detection completeness).
        
        Numerically convolve resulting pdfs to get the pdf
        for the sum of individual magnitude bins
        """

        print("Reached Count stage")

        f = interp1d(completeness[...,0],completeness[...,1],kind='linear')

        print(kupperlim)
        print(klowerlim)
        print(catalog['KMag'])
        print(catalog) #Apparently this print statement is necessary to make the selection of good rows
                       #in the next few lines work. That is super broken and bad of astropy.table

        good_rows = np.logical_and(catalog['KMag'] < kupperlim,catalog['KMag'] > klowerlim)
        print(good_rows)
        
        good = catalog[good_rows]
        #good = catalog[(catalog['KMag'] < kupperlim) & (catalog['KMag'] > klowerlim)]
        in_contour = good[good['CloudMask'] == 1]
        JminK = in_contour['JMag'] - in_contour['KMag']
        blue_in_contour = in_contour[JminK < blue_cut]
        self.plot_color_histogram(in_contour['JMag']-in_contour['KMag'],
                                  blue_in_contour['JMag']-blue_in_contour['KMag'],kupperlim)

        compfactor = f(blue_in_contour['KMag'])
        
        from scipy.stats import poisson
        ns = np.ones_like(compfactor)
        lam = np.dot(ns,1./compfactor)
        mmean = lam
        mmin,mmax = poisson.interval(0.99, lam)
        
        print("Number of blue stars in contour")
        print(len(ns))
        #print("Observed in each bin:")
        #print(ns)
        #print("Completeness in each bin")
        #print(compfactor)
        print("Estimated nnumber of blue stars in contour")
        print(mmean)
        print(mmin)
        print(mmax)

        return(mmin,mmean,mmax)
Beispiel #6
0
def double_poisson_ci(freq, alpha=0.99):
    """ Assuming two Poisson processes (1 for the event rate and 1 for randomization), calculate the confidence interval
    for the true rate

    Parameters
    ----------
    freq: float - co-occurrence frequency
    alpha: float - desired confidence. range: [0, 1]

    Returns
    -------
    (lower bound, upper bound)
    """
    # Adjust the interval for each individual poisson to achieve overall confidence interval
    alpha_adjusted = 1 - (1 - alpha)**0.5
    return (poisson.interval(alpha_adjusted,
                             poisson.interval(alpha_adjusted, freq)[0])[0],
            poisson.interval(alpha_adjusted,
                             poisson.interval(alpha_adjusted, freq)[1])[1])
Beispiel #7
0
    def confidence_intervals(self, alpha=0.99):
        """ Returns confidence intevals of counts

        Parameters
        ----------
        alpha

        Returns
        -------
        List of tuples with confidence intervals
        """
        return [poisson.interval(alpha, x) for x in self.counts]
Beispiel #8
0
def freq():
    plt.clf()
    for n in ntry:
        r = poisson.interval(.68, n)
        plt.plot(r, (n, n), color='black', linewidth=2)

    nmax = root(lambda x: poisson.interval(.68, x)[1] - nobs, nobs * .8).x[0]
    plt.plot(poisson.interval(.68, nmax), (nmax, nmax),
             color='red',
             label=r"$N_{{low}} = {:6.3f}$".format(nmax),
             linewidth=2)
    nmin = root(lambda x: poisson.interval(.68, x)[0] - nobs, nobs * 1.2).x[0]

    plt.plot(poisson.interval(.68, nmin), (nmin, nmin),
             color='red',
             label=r"$N_{{high}} = {:6.3f}$".format(nmin),
             linewidth=2)

    plt.plot(poisson.interval(.68, nobs), (nobs, nobs),
             color='blue',
             label=r"$N_{{max}} = {}$".format(nobs),
             linewidth=2)

    plt.axvline(x=nobs,
                label=r"$N_{{o}}={}$".format(nobs),
                ls=':',
                linewidth=2)
    plt.xlabel(r'68%  $N_o$ range')
    plt.ylabel(r'$N$')
    plt.legend(loc=0)
    plt.title(r'frequentist')
    plt.savefig('freq.pdf')
def getExpected(mu):
    """
    Given a mean coverage mu, determine the AUC, X-intercept, and elbow point
    of a Poisson-distributed perfectly behaved input sample with the same coverage
    """
    x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1))  # This will be an appropriate range
    pmf = poisson.pmf(x, mu=mu)
    cdf = poisson.cdf(x, mu=mu)
    cs = np.cumsum(pmf * x)
    cs /= max(cs)
    XInt = cdf[np.nonzero(cs)[0][0]]
    AUC = sum(poisson.pmf(x, mu=mu) * cs)
    elbow = cdf[np.argmax(cdf - cs)]
    return (AUC, XInt, elbow)
Beispiel #10
0
def getExpected(mu):
    """
    Given a mean coverage mu, determine the AUC, X-intercept, and elbow point
    of a Poisson-distributed perfectly behaved input sample with the same coverage
    """
    x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1))  # This will be an appropriate range
    pmf = poisson.pmf(x, mu=mu)
    cdf = poisson.cdf(x, mu=mu)
    cs = np.cumsum(pmf * x)
    cs /= max(cs)
    XInt = cdf[np.nonzero(cs)[0][0]]
    AUC = sum(poisson.pmf(x, mu=mu) * cs)
    elbow = cdf[np.argmax(cdf - cs)]
    return (AUC, XInt, elbow)
Beispiel #11
0
    def __init__(self, lam):
        '''
        Setup lamba for the poisson distribution.
        Calculate lower and upper bounds.
        '''
        self.lam = lam

        # Calculate Lower and Upper Bounds for the given confidence interall
        self.lower, self.upper = poisson.interval(self.CONFIDENCE_INTERVALL,
                                                  self.lam)
        self.lower = int(self.lower)
        self.upper = int(self.upper)

        # Caculate probabilities within the given bounds
        self.probs = [
            poisson.pmf(k, self.lam) for k in range(self.lower, self.upper)
        ]
def EPM_Poisson_countd(mu, library_size):
    #returns the Poisson mutation rate distribution for a given library size

    probs_list = []
    mut_list = []
    alpha = 1-1/(library_size*10)
    a,b = poisson.interval(alpha, mu, loc=0)
    a = int(a)
    b = int(b)
    for k in range(a,b+1):
        k_count = int(round(poisson.pmf(k,mu)*library_size,0))
        if k_count != 0:
            probs_list.append(k_count)
            mut_list.append(k)
    dif = sum(probs_list) - library_size
    mutation_list = [i for i in range(a,b+1)]
    index = mutation_list.index(mu)
    probs_list[index] -= dif

    return probs_list, mut_list
Beispiel #13
0
    def process_densities(self):
        """Determine errors on the data"""

        # Error due to PMT linearity and ADC/mV resolution
        #         self.lin_out_i = zeros((len(self.lin_bins), 4))
        #         for i in range(4):
        #             self.lin_out_i[:, i] = get_out_for_in(self.lin_bins, self.ref_in_i[:, i], self.ref_out)
        #         self.lin_out = get_out_for_in(self.lin_bins, self.ref_in, self.ref_out)
        #         self.dvindvout = (diff(self.lin_bins) / diff(self.lin_out_i[:,1])).tolist()  # dVin/dVout
        #         self.dvindvout.extend([self.dvindvout[-1]])
        #         self.dvindvout = array(self.dvindvout)
        #         self.sigma_Vout = 0.57 / 2.  # Error on Vout
        #         self.sigma_Vin = self.sigma_Vout * self.dvindvout

        # Resolution of the detector
        sigma_res = 0.7
        r_lower, r_upper = norm.interval(0.68, self.lin_bins,
                                         sqrt(self.lin_bins) * sigma_res)
        self.response_lower = r_lower
        self.response_upper = r_upper
        self.response_lower_pmt = get_out_for_in(r_lower, self.ref_in,
                                                 self.ref_out)
        self.response_upper_pmt = get_out_for_in(r_upper, self.ref_in,
                                                 self.ref_out)

        # Poisson error 68% interval (one sigma)
        # Note; Poisson error is less for average because of larger area.
        # Calculate std of expected given x,

        p_lower, p_upper = poisson.interval(0.68, self.lin_bins)
        self.poisson_lower = p_lower
        self.poisson_upper = p_upper
        self.poisson_lower_pmt = get_out_for_in(p_lower, self.ref_in,
                                                self.ref_out)
        self.poisson_upper_pmt = get_out_for_in(p_upper, self.ref_in,
                                                self.ref_out)
Beispiel #14
0
frb_rate = 6.e3  #bursts per sky per day (Champion+15)

#PAF properties
fwhm = 14  #arcmin
fov_one_beam = pi * (fwhm / (2. * 60))**2  #degrees
fov_all_deg = 27 * fov_one_beam
fov_all = fov_all_deg / 41252.9

#Scale factor for relative sensitivies
#Note, it does *not* take cosmology into account,
# which you should well before a redshift of 5
Tsys_factor = (50. / 25.)**-1.5

#Time request
Ttot = 800. / 24  #Total time in days

#Rate per observing program
mu = frb_rate * Tsys_factor * fov_all * Ttot
print mu
#Number observed dummy array
k = np.linspace(0.0, 10.0, num=200)

#Probability mass function
frb_pmf = poisson.pmf(k, mu)

#frb_cdf=poisson.cdf(k, mu)

#Calculate the interval that gives gives 0.68% of distribution
print poisson.interval(0.68, mu)
print poisson.interval(0.95, 6000)
def LnRatioConfInt(freq, ln_ratio, interval=0.99):
    # Convert ln_ratio back to ratio and calculate confidence intervals for the ratios
    return np.log(
        np.array(poisson.interval(interval, freq)) * np.exp(ln_ratio) / freq)
Beispiel #16
0
            elif cl[i - 1] == '--pvalue-combination-livetime':
                pvalue_livetimes.add(float(arg))
            elif cl[i - 1] == '--ifar-double-followup-threshold':
                dfuts.add(float(arg))

ifars = np.sort(np.array(ifars))
count = np.arange(len(ifars))[::-1] + 1
time = time / lal.YRJUL_SI
rate = count / time

pl.step(ifars, rate, label='Observation')

ifars2 = np.logspace(np.log10(ifars.min()), np.log10(ifars.max()), 1000)
label = 'Expectation'
for prob in [0.6827, 0.9545, 0.9973]:
    a, b = poisson.interval(prob, time / ifars2)
    pl.fill_between(ifars2,
                    a / time,
                    b / time,
                    alpha=0.3,
                    edgecolor='none',
                    facecolor='C1',
                    label=label)
    label = None

for ut in upload_thresholds:
    pl.axvline(ut, color='r', ls='--', label='--ifar-upload-threshold')

for pvlt in pvalue_livetimes:
    pl.axvline(pvlt, color='b', ls=':', label='--pvalue-combination-livetime')
Beispiel #17
0
    def count_blue_stars_in_contour(self,
                                    completeness,
                                    blue_cut=1.3,
                                    kupperlim=15.,
                                    klowerlim=12.,
                                    ph_qual=False,
                                    plot=False,
                                    catalog=None,
                                    survey=None):
        """
        Determine which of the stars inside
        the contour are blue. 
        
        Estimate a confidence 0.95 confidence interval
        for the number of stars present given the detection
        completeness. Approximate by binning on magnitude
        and using the binomial distribution for each of these
        bins (with a fixed detection completeness).
        
        Numerically convolve resulting pdfs to get the pdf
        for the sum of individual magnitude bins
        """

        print("Reached Count stage")

        f = interp1d(completeness[..., 0], completeness[..., 1], kind='linear')

        print(kupperlim)
        print(klowerlim)
        print(catalog['KMag'])
        print(
            catalog
        )  #Apparently this print statement is necessary to make the selection of good rows
        #in the next few lines work. That is super broken and bad of astropy.table

        good_rows = np.logical_and(catalog['KMag'] < kupperlim,
                                   catalog['KMag'] > klowerlim)
        print(good_rows)

        good = catalog[good_rows]
        #good = catalog[(catalog['KMag'] < kupperlim) & (catalog['KMag'] > klowerlim)]
        in_contour = good[good['CloudMask'] == 1]
        JminK = in_contour['JMag'] - in_contour['KMag']
        blue_in_contour = in_contour[JminK < blue_cut]
        self.plot_color_histogram(
            in_contour['JMag'] - in_contour['KMag'],
            blue_in_contour['JMag'] - blue_in_contour['KMag'], kupperlim)

        compfactor = f(blue_in_contour['KMag'])

        from scipy.stats import poisson
        ns = np.ones_like(compfactor)
        lam = np.dot(ns, 1. / compfactor)
        mmean = lam
        mmin, mmax = poisson.interval(0.99, lam)

        print("Number of blue stars in contour")
        print(len(ns))
        #print("Observed in each bin:")
        #print(ns)
        #print("Completeness in each bin")
        #print(compfactor)
        print("Estimated nnumber of blue stars in contour")
        print(mmean)
        print(mmin)
        print(mmax)

        return (mmin, mmean, mmax)
Beispiel #18
0
def confidence_width(count):
    ci_low, ci_upp = poisson.interval(0.95, count)
    #print(ci_low, ci_upp)
    return ci_upp - ci_low
Beispiel #19
0
    subprocess.run("wget https://coinmetrics.io/newdata/btc.csv",
                   shell=True,
                   check=True)

    cm = pd.read_csv('btc.csv')

    cm = utils.get_extra_datetime_cols(cm, 'date')

    cm['BlkSizeByte'] = cm['BlkCnt'] * cm['BlkSizeMeanByte']

    cm['HashRateL7DInc'] = cm['HashRate'].rolling(7).mean()
    cm['HashRateL7D'] = cm['HashRateL7DInc'].shift() / 1000000

    alpha = 0.025
    cm['BlkCntLower'] = [
        poisson.interval(1 - alpha, x)[0] for x in cm['BlkCnt']
    ]
    cm['BlkCntUpper'] = [
        poisson.interval(1 - alpha, x)[1] for x in cm['BlkCnt']
    ]
    cm['HashRateLower'] = [
        (x / 144) * y * (((2**32) / (10**12)) / (600 * 1000000))
        for x, y in zip(cm['BlkCntLower'], cm['DiffMean'])
    ]
    cm['HashRateUpper'] = [
        (x / 144) * y * (((2**32) / (10**12)) / (600 * 1000000))
        for x, y in zip(cm['BlkCntUpper'], cm['DiffMean'])
    ]

    dfs = {}
    median_metrics = [