def binomCheck( ntpos ): #checking the nt position for both forward and reverse mate pairs fordict = FORWARD_DICT[ntpos] revdict = REVERSE_DICT[ntpos] if not fordict or not revdict: #if either forward dict or reverse dict is empty.. accept = False else: topF = sorted( fordict, key=fordict.get, reverse=True )[: 2] #dictionaries can be in any particular order, where lists indexes start at 0 topR = sorted(revdict, key=revdict.get, reverse=True)[:2] if len(topF) == 1 or len(topR) == 1: accept = False print 'unequal minor variant count in forward/reverse %d' % ntpos #%s= string %d = number variable ntpos has already been defined print 'forward', fordict print 'reverse', revdict NOTESLIST.append('take a closer look at, only one minorvar %d' % ntpos) NOTESLIST.append(fordict) NOTESLIST.append(revdict) else: f_majornt = topF[ 0] #this will be the major because it will be the first highest f_minornt = topF[ 1] #this will be the minor variant because it will be the second highest, remember that python starts numbering at 0 r_majornt = topR[0] r_minornt = topR[1] if f_majornt != r_majornt or f_minornt != r_minornt: print 'binom not equal' NOTESLIST.append( 'take a closer look at %d' % ntpos ) #this will be added to the notelist for what went wrong exactly NOTESLIST.append([f_majornt, r_majornt, f_minornt, r_minornt]) accept = False else: forwardMajorCount = fordict[f_majornt] forwardMinorCount = fordict[f_minornt] reverseMajorCount = revdict[r_majornt] reverseMinorCount = revdict[r_minornt] ALPHA = 0.05 #to check and make sure that it is signficiant, or in a significant number of reads pforward = 1 - binom.cdf( forwardMinorCount, (forwardMajorCount + forwardMinorCount), args.cutoff) #calculating the p value preverse = 1 - binom.cdf( reverseMinorCount, (reverseMajorCount + reverseMinorCount), args.cutoff) if pforward <= ALPHA / 2 and preverse <= ALPHA / 2: accept = True else: accept = False return accept
def solve(N, X, Y): X = abs(X) z = xytotri(X + Y) #print "%s, %s, %s, %s" % (N, X, Y, z) if z >= N: return 0 #print xytotri(X+Y+2) if xytotri(X + Y + 2) <= N: return 1 rem = N - z #print "%s, %s, %s, %s, %s" % (N, X, Y, z, rem) #print (((Y/2)+1)*2)+1 #if X == 0 and rem < (((Y/2)+1)*2)+1: if X == 0 and xytotri(X + Y + 2) > N: return 0 #print "n, z, x, y, rem", N, z, X, Y, rem #return 1-normal_estimate((Y+1)-1, 0.5, rem) if rem - (xytotri(X + Y + 2) - z - 1) / 2 > Y: return 1 return 1 - binom.cdf((Y + 1) - 1, rem, 0.5)
def binomialcheck(majornt,minornt,fordict,revdict): forwardMajorCount = fordict[majornt] forwardMinorCount = fordict[minornt] reverseMajorCount = revdict[majornt] reverseMinorCount = revdict[minornt] percentVariant = 0.03 ALPHA = 0.05 pforward = 1 - binom.cdf( forwardMinorCount, (forwardMajorCount + forwardMinorCount), percentVariant) preverse = 1 - binom.cdf( reverseMinorCount, (reverseMajorCount + reverseMinorCount), percentVariant) if pforward <= ALPHA/2 and preverse <= ALPHA/2: accept = True else: accept = False return accept
def WFMD(N,m,g,k): ''' The probability that in a population of N diploid individuals initially possessing m copies of a dominant allele, we will observe after g generations at least k copies of a recessive allele. Assume the Wright-Fisher model. ''' p = float(m)/(2*N) q = 1-p bc = binom.cdf(k,n,p) return
def WFMD(N, m, g, k): ''' The probability that in a population of N diploid individuals initially possessing m copies of a dominant allele, we will observe after g generations at least k copies of a recessive allele. Assume the Wright-Fisher model. ''' p = float(m) / (2 * N) q = 1 - p bc = binom.cdf(k, n, p) return
def _calculate_ci(p,sigma,n): """Return all indices j and k that correspond to confidence interval of level sigma for percentile p*100 along with the respective confidence levels Arguments: p : p-quantile e.g. F(m_p) = P(X < m_p) = p for 0 < p < 1 sigma: confidence interval level n : number of samples Returns: (j_selection,k_selection,confidence_levels) We need to calculate B_{n,p}(k-1)-B_{n,p}(j-1) \leq \sigma Therefore, we do an exhaustive search for all values of k and j and then filter out See Jean-Yves Le Boudec, Performance Evaluation of Computer and Communication Systems, EPFL Press, 2010 """ j_k_range = np.arange(0,n) # Already j-1 and k-1 J = np.tile(j_k_range,(n,1)).T K = np.tile(j_k_range,(n,1)) # print(J) # print(K) diff_Bk_Bj = binom.cdf(K,n,p)-binom.cdf(J,n,p) j_all,k_all = np.where(diff_Bk_Bj >= sigma) # We get too many of them if len(j_all) == 0: return None diff_k_j = k_all-j_all # Hence, find the minimum interval index_min_int = np.where(diff_k_j == diff_k_j.min()) # There might be several of them j_selection = j_all[index_min_int]+1 # j and k can range from 1 to n k_selection = k_all[index_min_int]+1 confidence_levels = diff_Bk_Bj[j_selection-1,k_selection-1] # All confidence intervals and their confidence level return (j_selection,k_selection,confidence_levels)
def Compute_Binomial_Prob(Topic_List,Global_Topic_Count): """Commutes pValues from a binomial probility distribution given a list of events and a dictonary that descirbes the freqeuncy those events are expected to be observed at. The values in the Topic_List must be the keys is in the Global_Topic_Count Keywords: Topic_List -- List of all the topics that are being test for disbution, each value should have a labled topic and thats what this list is Global_Topic_Count -- dictonary containing the expected distrbution of topics returns: List_of_Topics_Dict -- List of Dicts with keys as ['names','obs','expected','pval'] sorted by obs """ List_of_Topic_Dict =[] Global_Keys =Global_Topic_Count.keys() i = 0 for key,val in dict(Counter(Topic_List)).items(): List_of_Topic_Dict.append({'name':key,'obs':val,'exp':int(len(Topic_List)*Global_Topic_Count[Global_Keys[i]])}) if List_of_Topic_Dict[-1]['exp']>=List_of_Topic_Dict[-1]['obs']: List_of_Topic_Dict[-1]['pVal']=binom.cdf(List_of_Topic_Dict[-1]['obs'], len(Topic_List),Global_Topic_Count[Global_Keys[i]] ) else: List_of_Topic_Dict[-1]['pVal']=1-binom.cdf(List_of_Topic_Dict[-1]['obs'], len(Topic_List), Global_Topic_Count[Global_Keys[i]]) i +=1 return sorted(List_of_Topic_Dict, key=lambda x: x['obs'], reverse=True)
def _cihs_1D(data, alpha): data = np.sort(data.compressed()) n = len(data) alpha = min(alpha, 1 - alpha) k = int(binom._ppf(alpha / 2.0, n, 0.5)) gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5) if gk < 1 - alpha: k -= 1 gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5) gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5) I = (gk - 1 + alpha) / (gk - gkk) lambd = (n - k) * I / float(k + (n - 2 * k) * I) lims = (lambd * data[k] + (1 - lambd) * data[k - 1], lambd * data[n - k - 1] + (1 - lambd) * data[n - k]) return lims
def _cihs_1D(data, alpha): data = np.sort(data.compressed()) n = len(data) alpha = min(alpha, 1 - alpha) k = int(binom._ppf(alpha / 2., n, 0.5)) gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5) if gk < 1 - alpha: k -= 1 gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5) gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5) I = (gk - 1 + alpha) / (gk - gkk) lambd = (n - k) * I / float(k + (n - 2 * k) * I) lims = (lambd * data[k] + (1 - lambd) * data[k - 1], lambd * data[n - k - 1] + (1 - lambd) * data[n - k]) return lims
train = int(line.replace('(', '').replace(',', '').split()[0]) continue test = int(line.replace('(', '').replace(',', '').split()[0]) elif line.startswith('Test score:'): if firstTestScore is None: firstTestScore = float(line.replace('Test score: ', '')) continue secondTestScore = float(line.replace('Test score: ', '')) assert (train is not None) assert (test is not None) assert (testScoreAll is not None) a_cdf = 1 - binom.cdf(secondTestScore * test, test, firstTestScore) if firstTestScore > secondTestScore: a_cdf = -a_cdf b_cdf = 1 - binom.cdf(secondTestScore * test, test, testScoreAll) if testScoreAll > secondTestScore: b_cdf = -b_cdf #print(a_cdf, b_cdf) arr.append([ name, train, test, firstTestScore, secondTestScore, testScoreAll, a_cdf, b_cdf ]) name = None train = None test = None firstTestScore = None
def INDC(n, k, p): ''' the probability of observating at least k 'heads' in 2n trials ''' bc = binom.cdf(k, n, p) return math.log(bc, 10)
def INDC(n, k, p): bc = binom.cdf(k, n, p) return math.log(bc, 10)
def binomial(n, c, p): return binom.cdf(c, n, p)
def binom_test_low(n, N, p): return binom.cdf(n, N, p)
def INDC(n,k,p): ''' the probability of observating at least k 'heads' in 2n trials ''' bc = binom.cdf(k,n,p) return math.log(bc,10)
def pbinom(successes, fail, prob): """ Returns cumulative binomial probability given number of 'successes' and 'failures'. """ total = successes + fail return binom.cdf(successes, total, prob)
def calculate_p1_index(n_success, n_attempts, chance_of_success): return 1 - binom.cdf(n_success-1, n_attempts, chance_of_success)
def result(a, b): if alpha < binom.cdf(b, a + b + 1, 0.5) < 1- alpha: return '-' else: return '+'
def calculate_p2_index(n_success, n_attempts, chance_of_success): return 2 * min(calculate_p1_index(n_success, n_attempts, chance_of_success), binom.cdf(n_success, n_attempts, chance_of_success))
def INDC(n,k,p): bc = binom.cdf(k,n,p) return math.log(bc,10)