def getSequence(data, ff): print "Generating observed sequence..." num_reads_p = len(data) * ff / 2 # Paternal reads are 1/2 of the fetus' num_reads_m = len(data) - num_reads_p # Maternal reads are the rest expected_coverage_p = num_reads_p * READ_LEN * BUCKET_SIZE / CHR_LEN expected_coverage_m = num_reads_m * READ_LEN * BUCKET_SIZE / CHR_LEN # Generate two distributions, one for the father, and one for the mother low_p, high_p = poisson.interval(0.333, expected_coverage_p) low_m, high_m = poisson.interval(0.333, expected_coverage_m) # Count the number of times a read comes from m or p in each bucket coverage_p = defaultdict(lambda: 0) coverage_m = defaultdict(lambda: 0) for read in data: pos = int(read[0]) read_len = len(read[1]) for i in range(read_len): bucket = (pos + i) / BUCKET_SIZE if read[3] == "-": coverage_p[bucket] += 1 coverage_m[bucket] += 1 elif read[3] == "p": coverage_p[bucket] += 1 else: coverage_m[bucket] += 1 # Decide if the number of reads represents a low, normal, or high distribution coverage = {} keys = coverage_p.keys() + list( set(coverage_m.keys()) - set(coverage_p.keys())) for key in range(CHR_LEN / BUCKET_SIZE + 1): if coverage_p[key] < low_p: p_val = "L" elif coverage_p[key] < high_p: p_val = "N" else: p_val = "H" if coverage_m[key] < low_m: m_val = "L" elif coverage_m[key] < high_m: m_val = "N" else: m_val = "H" val = (p_val, m_val) coverage[key] = val # Sort it by position observed_seq = [] for key in sorted(coverage): observed_seq.append(coverage[key]) print "Done." return observed_seq
def poisson_mode_and_alpha(expected, alpha): """ Returns mode number of expected substitutions and upper & lower limits within which falls `alpha` fraction of the occurrences for a poisson distribution with lambda=`expected` value. Parameters ---------- expected : float Expected substitutions. alpha : float Probability interval containing alpha fraction of the Poisson distribution. Returns ------- mode : int Typical number of substitutions expected. lower_limit, upper_limit : int Lower number of substitutions between which alpha fraction of the Poisson distribution is contained. upper_limit : int Lower number of substitutions between which alpha fraction of the Poisson distribution is contained. """ mean, var = poisson.stats(expected, moments='mv') mode = math.floor(mean) # round down to closest integer lower_limit, upper_limit = poisson.interval(alpha, expected, loc=0) return mode, lower_limit, upper_limit
def EPM_Poisson_countd(mu, library_size): '''Returns the Poisson mutation rate distribution for a given library size Average rate is set by mu, library size is the number of sequnces in the library Returns two lists, probs_list contains the number of sequences with the corresponding number of mutations in mut_list ''' probs_list = [] mut_list = [] alpha = 1-1/(library_size*10) a,b = poisson.interval(alpha, mu, loc=0) a = int(a) b = int(b) for k in range(a,b+1): k_count = int(round(poisson.pmf(k,mu)*library_size,0)) if k_count != 0: probs_list.append(k_count) mut_list.append(k) #If, due to rounding, the total library size is greater than expected #Subtract the difference from the mean (mu) dif = sum(probs_list) - library_size mutation_list = [i for i in range(a,b+1)] index = mutation_list.index(mu) probs_list[index] -= dif return probs_list, mut_list
def Tukey_outliers(set_of_means, FDR=0.005, supporting_interval=0.5, verbose=False): """ Performs Tukey quintile test for outliers from a normal distribution with defined false discovery rate :param set_of_means: :param FDR: :return: """ # false discovery rate v.s. expected falses v.s. power q1_q3 = norm.interval(supporting_interval) FDR_q1_q3 = norm.interval(1 - FDR) # TODO: this is not necessary: we can perfectly well fit it with proper params to FDR multiplier = (FDR_q1_q3[1] - q1_q3[1]) / (q1_q3[1] - q1_q3[0]) l_means = len(set_of_means) q1 = np.percentile(set_of_means, 50*(1-supporting_interval)) q3 = np.percentile(set_of_means, 50*(1+supporting_interval)) high_fence = q3 + multiplier*(q3 - q1) low_fence = q1 - multiplier*(q3 - q1) if verbose: print 'FDR:', FDR print 'q1_q3', q1_q3 print 'FDRq1_q3', FDR_q1_q3 print 'q1, q3', q1, q3 print 'fences', high_fence, low_fence if verbose: print "FDR: %s %%, expected outliers: %s, outlier 5%% confidence interval: %s"% (FDR*100, FDR*l_means, poisson.interval(0.95, FDR*l_means)) ho = (set_of_means < low_fence).nonzero()[0] lo = (set_of_means > high_fence).nonzero()[0] return lo, ho
def count_blue_stars_in_contour(self,completeness,blue_cut=1.3,kupperlim = 15.,klowerlim = 12.,ph_qual = False,plot=False,catalog=None,survey=None): """ Determine which of the stars inside the contour are blue. Estimate a confidence 0.95 confidence interval for the number of stars present given the detection completeness. Approximate by binning on magnitude and using the binomial distribution for each of these bins (with a fixed detection completeness). Numerically convolve resulting pdfs to get the pdf for the sum of individual magnitude bins """ print("Reached Count stage") f = interp1d(completeness[...,0],completeness[...,1],kind='linear') print(kupperlim) print(klowerlim) print(catalog['KMag']) print(catalog) #Apparently this print statement is necessary to make the selection of good rows #in the next few lines work. That is super broken and bad of astropy.table good_rows = np.logical_and(catalog['KMag'] < kupperlim,catalog['KMag'] > klowerlim) print(good_rows) good = catalog[good_rows] #good = catalog[(catalog['KMag'] < kupperlim) & (catalog['KMag'] > klowerlim)] in_contour = good[good['CloudMask'] == 1] JminK = in_contour['JMag'] - in_contour['KMag'] blue_in_contour = in_contour[JminK < blue_cut] self.plot_color_histogram(in_contour['JMag']-in_contour['KMag'], blue_in_contour['JMag']-blue_in_contour['KMag'],kupperlim) compfactor = f(blue_in_contour['KMag']) from scipy.stats import poisson ns = np.ones_like(compfactor) lam = np.dot(ns,1./compfactor) mmean = lam mmin,mmax = poisson.interval(0.99, lam) print("Number of blue stars in contour") print(len(ns)) #print("Observed in each bin:") #print(ns) #print("Completeness in each bin") #print(compfactor) print("Estimated nnumber of blue stars in contour") print(mmean) print(mmin) print(mmax) return(mmin,mmean,mmax)
def double_poisson_ci(freq, alpha=0.99): """ Assuming two Poisson processes (1 for the event rate and 1 for randomization), calculate the confidence interval for the true rate Parameters ---------- freq: float - co-occurrence frequency alpha: float - desired confidence. range: [0, 1] Returns ------- (lower bound, upper bound) """ # Adjust the interval for each individual poisson to achieve overall confidence interval alpha_adjusted = 1 - (1 - alpha)**0.5 return (poisson.interval(alpha_adjusted, poisson.interval(alpha_adjusted, freq)[0])[0], poisson.interval(alpha_adjusted, poisson.interval(alpha_adjusted, freq)[1])[1])
def confidence_intervals(self, alpha=0.99): """ Returns confidence intevals of counts Parameters ---------- alpha Returns ------- List of tuples with confidence intervals """ return [poisson.interval(alpha, x) for x in self.counts]
def freq(): plt.clf() for n in ntry: r = poisson.interval(.68, n) plt.plot(r, (n, n), color='black', linewidth=2) nmax = root(lambda x: poisson.interval(.68, x)[1] - nobs, nobs * .8).x[0] plt.plot(poisson.interval(.68, nmax), (nmax, nmax), color='red', label=r"$N_{{low}} = {:6.3f}$".format(nmax), linewidth=2) nmin = root(lambda x: poisson.interval(.68, x)[0] - nobs, nobs * 1.2).x[0] plt.plot(poisson.interval(.68, nmin), (nmin, nmin), color='red', label=r"$N_{{high}} = {:6.3f}$".format(nmin), linewidth=2) plt.plot(poisson.interval(.68, nobs), (nobs, nobs), color='blue', label=r"$N_{{max}} = {}$".format(nobs), linewidth=2) plt.axvline(x=nobs, label=r"$N_{{o}}={}$".format(nobs), ls=':', linewidth=2) plt.xlabel(r'68% $N_o$ range') plt.ylabel(r'$N$') plt.legend(loc=0) plt.title(r'frequentist') plt.savefig('freq.pdf')
def getExpected(mu): """ Given a mean coverage mu, determine the AUC, X-intercept, and elbow point of a Poisson-distributed perfectly behaved input sample with the same coverage """ x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1)) # This will be an appropriate range pmf = poisson.pmf(x, mu=mu) cdf = poisson.cdf(x, mu=mu) cs = np.cumsum(pmf * x) cs /= max(cs) XInt = cdf[np.nonzero(cs)[0][0]] AUC = sum(poisson.pmf(x, mu=mu) * cs) elbow = cdf[np.argmax(cdf - cs)] return (AUC, XInt, elbow)
def __init__(self, lam): ''' Setup lamba for the poisson distribution. Calculate lower and upper bounds. ''' self.lam = lam # Calculate Lower and Upper Bounds for the given confidence interall self.lower, self.upper = poisson.interval(self.CONFIDENCE_INTERVALL, self.lam) self.lower = int(self.lower) self.upper = int(self.upper) # Caculate probabilities within the given bounds self.probs = [ poisson.pmf(k, self.lam) for k in range(self.lower, self.upper) ]
def EPM_Poisson_countd(mu, library_size): #returns the Poisson mutation rate distribution for a given library size probs_list = [] mut_list = [] alpha = 1-1/(library_size*10) a,b = poisson.interval(alpha, mu, loc=0) a = int(a) b = int(b) for k in range(a,b+1): k_count = int(round(poisson.pmf(k,mu)*library_size,0)) if k_count != 0: probs_list.append(k_count) mut_list.append(k) dif = sum(probs_list) - library_size mutation_list = [i for i in range(a,b+1)] index = mutation_list.index(mu) probs_list[index] -= dif return probs_list, mut_list
def process_densities(self): """Determine errors on the data""" # Error due to PMT linearity and ADC/mV resolution # self.lin_out_i = zeros((len(self.lin_bins), 4)) # for i in range(4): # self.lin_out_i[:, i] = get_out_for_in(self.lin_bins, self.ref_in_i[:, i], self.ref_out) # self.lin_out = get_out_for_in(self.lin_bins, self.ref_in, self.ref_out) # self.dvindvout = (diff(self.lin_bins) / diff(self.lin_out_i[:,1])).tolist() # dVin/dVout # self.dvindvout.extend([self.dvindvout[-1]]) # self.dvindvout = array(self.dvindvout) # self.sigma_Vout = 0.57 / 2. # Error on Vout # self.sigma_Vin = self.sigma_Vout * self.dvindvout # Resolution of the detector sigma_res = 0.7 r_lower, r_upper = norm.interval(0.68, self.lin_bins, sqrt(self.lin_bins) * sigma_res) self.response_lower = r_lower self.response_upper = r_upper self.response_lower_pmt = get_out_for_in(r_lower, self.ref_in, self.ref_out) self.response_upper_pmt = get_out_for_in(r_upper, self.ref_in, self.ref_out) # Poisson error 68% interval (one sigma) # Note; Poisson error is less for average because of larger area. # Calculate std of expected given x, p_lower, p_upper = poisson.interval(0.68, self.lin_bins) self.poisson_lower = p_lower self.poisson_upper = p_upper self.poisson_lower_pmt = get_out_for_in(p_lower, self.ref_in, self.ref_out) self.poisson_upper_pmt = get_out_for_in(p_upper, self.ref_in, self.ref_out)
frb_rate = 6.e3 #bursts per sky per day (Champion+15) #PAF properties fwhm = 14 #arcmin fov_one_beam = pi * (fwhm / (2. * 60))**2 #degrees fov_all_deg = 27 * fov_one_beam fov_all = fov_all_deg / 41252.9 #Scale factor for relative sensitivies #Note, it does *not* take cosmology into account, # which you should well before a redshift of 5 Tsys_factor = (50. / 25.)**-1.5 #Time request Ttot = 800. / 24 #Total time in days #Rate per observing program mu = frb_rate * Tsys_factor * fov_all * Ttot print mu #Number observed dummy array k = np.linspace(0.0, 10.0, num=200) #Probability mass function frb_pmf = poisson.pmf(k, mu) #frb_cdf=poisson.cdf(k, mu) #Calculate the interval that gives gives 0.68% of distribution print poisson.interval(0.68, mu) print poisson.interval(0.95, 6000)
def LnRatioConfInt(freq, ln_ratio, interval=0.99): # Convert ln_ratio back to ratio and calculate confidence intervals for the ratios return np.log( np.array(poisson.interval(interval, freq)) * np.exp(ln_ratio) / freq)
elif cl[i - 1] == '--pvalue-combination-livetime': pvalue_livetimes.add(float(arg)) elif cl[i - 1] == '--ifar-double-followup-threshold': dfuts.add(float(arg)) ifars = np.sort(np.array(ifars)) count = np.arange(len(ifars))[::-1] + 1 time = time / lal.YRJUL_SI rate = count / time pl.step(ifars, rate, label='Observation') ifars2 = np.logspace(np.log10(ifars.min()), np.log10(ifars.max()), 1000) label = 'Expectation' for prob in [0.6827, 0.9545, 0.9973]: a, b = poisson.interval(prob, time / ifars2) pl.fill_between(ifars2, a / time, b / time, alpha=0.3, edgecolor='none', facecolor='C1', label=label) label = None for ut in upload_thresholds: pl.axvline(ut, color='r', ls='--', label='--ifar-upload-threshold') for pvlt in pvalue_livetimes: pl.axvline(pvlt, color='b', ls=':', label='--pvalue-combination-livetime')
def count_blue_stars_in_contour(self, completeness, blue_cut=1.3, kupperlim=15., klowerlim=12., ph_qual=False, plot=False, catalog=None, survey=None): """ Determine which of the stars inside the contour are blue. Estimate a confidence 0.95 confidence interval for the number of stars present given the detection completeness. Approximate by binning on magnitude and using the binomial distribution for each of these bins (with a fixed detection completeness). Numerically convolve resulting pdfs to get the pdf for the sum of individual magnitude bins """ print("Reached Count stage") f = interp1d(completeness[..., 0], completeness[..., 1], kind='linear') print(kupperlim) print(klowerlim) print(catalog['KMag']) print( catalog ) #Apparently this print statement is necessary to make the selection of good rows #in the next few lines work. That is super broken and bad of astropy.table good_rows = np.logical_and(catalog['KMag'] < kupperlim, catalog['KMag'] > klowerlim) print(good_rows) good = catalog[good_rows] #good = catalog[(catalog['KMag'] < kupperlim) & (catalog['KMag'] > klowerlim)] in_contour = good[good['CloudMask'] == 1] JminK = in_contour['JMag'] - in_contour['KMag'] blue_in_contour = in_contour[JminK < blue_cut] self.plot_color_histogram( in_contour['JMag'] - in_contour['KMag'], blue_in_contour['JMag'] - blue_in_contour['KMag'], kupperlim) compfactor = f(blue_in_contour['KMag']) from scipy.stats import poisson ns = np.ones_like(compfactor) lam = np.dot(ns, 1. / compfactor) mmean = lam mmin, mmax = poisson.interval(0.99, lam) print("Number of blue stars in contour") print(len(ns)) #print("Observed in each bin:") #print(ns) #print("Completeness in each bin") #print(compfactor) print("Estimated nnumber of blue stars in contour") print(mmean) print(mmin) print(mmax) return (mmin, mmean, mmax)
def confidence_width(count): ci_low, ci_upp = poisson.interval(0.95, count) #print(ci_low, ci_upp) return ci_upp - ci_low
subprocess.run("wget https://coinmetrics.io/newdata/btc.csv", shell=True, check=True) cm = pd.read_csv('btc.csv') cm = utils.get_extra_datetime_cols(cm, 'date') cm['BlkSizeByte'] = cm['BlkCnt'] * cm['BlkSizeMeanByte'] cm['HashRateL7DInc'] = cm['HashRate'].rolling(7).mean() cm['HashRateL7D'] = cm['HashRateL7DInc'].shift() / 1000000 alpha = 0.025 cm['BlkCntLower'] = [ poisson.interval(1 - alpha, x)[0] for x in cm['BlkCnt'] ] cm['BlkCntUpper'] = [ poisson.interval(1 - alpha, x)[1] for x in cm['BlkCnt'] ] cm['HashRateLower'] = [ (x / 144) * y * (((2**32) / (10**12)) / (600 * 1000000)) for x, y in zip(cm['BlkCntLower'], cm['DiffMean']) ] cm['HashRateUpper'] = [ (x / 144) * y * (((2**32) / (10**12)) / (600 * 1000000)) for x, y in zip(cm['BlkCntUpper'], cm['DiffMean']) ] dfs = {} median_metrics = [