def pdf(self, x, k, n, p): '''distribution of success runs of length k or more Parameters ---------- x : float count of runs of length n k : int length of runs n : int total number of observations or trials p : float probability of success in each Bernoulli trial Returns ------- pdf : float probability that x runs of length of k are observed Notes ----- not yet vectorized References ---------- Muselli 1996, theorem 3 ''' q = 1 - p m = np.arange(x, (n + 1) // (k + 1) + 1)[:, None] terms = (-1)**(m-x) * comb(m, x) * p**(m*k) * q**(m-1) \ * (comb(n - m*k, m - 1) + q * comb(n - m*k, m)) return terms.sum(0)
def pdf(self, x, k, n, p): '''distribution of success runs of length k or more Parameters ---------- x : float count of runs of length n k : int length of runs n : int total number of observations or trials p : float probability of success in each Bernoulli trial Returns ------- pdf : float probability that x runs of length of k are observed Notes ----- not yet vectorized References ---------- Muselli 1996, theorem 3 ''' q = 1-p m = np.arange(x, (n+1)//(k+1)+1)[:,None] terms = (-1)**(m-x) * comb(m, x) * p**(m*k) * q**(m-1) \ * (comb(n - m*k, m - 1) + q * comb(n - m*k, m)) return terms.sum(0)
def hypergProb(k, N, m, n): """ Wikipedia: There is a shipment of N objects in which m are defective. The hypergeometric distribution describes the probability that in a sample of n distinctive objects drawn from the shipment exactly k objects are defective. """ #return float(choose(m, k) * choose(N-m, n-k)) / choose(N, n) hp = float(scipy.comb(m, k) * scipy.comb(N-m, n-k)) / scipy.comb(N, n) if scipy.isnan(hp): stderr.write("error: not possible to calculate hyperg probability in util.py for k=%d, N=%d, m=%d, n=%d\n" %(k, N, m,n)) stdout.write("error: not possible to calculate hyperg probability in util.py for k=%d, N=%d, m=%d, n=%d\n" %(k, N, m,n)) return hp
def mv_hypergeometric(x, m): """ x : number of draws for each category. m : size of each category. """ x = np.asarray(x) m = np.asarray(m) return log(comb(m, x).prod() / comb(m.sum(), x.sum()))
def prob(c, n, j, k): pi = k - j # how many new marbles to pick po = c - pi # how many old marbles to pick old_count = j new_count = n - j return comb(old_count, po) * comb(new_count, pi) / comb(n, c)
def mv_hypergeometric(x,m): """ x : number of draws for each category. m : size of each category. """ x = np.asarray(x) m = np.asarray(m) return log(comb(m,x).prod()/comb(m.sum(), x.sum()))
def runs_prob_odd(self, r): n0, n1 = self.n0, self.n1 k = (r + 1) // 2 tmp0 = comb(n0 - 1, k - 1) tmp1 = comb(n1 - 1, k - 2) tmp3 = comb(n0 - 1, k - 2) tmp4 = comb(n1 - 1, k - 1) return (tmp0 * tmp1 + tmp3 * tmp4) / self.comball
def runs_prob_odd(self, r): n0, n1 = self.n0, self.n1 k = (r+1)//2 tmp0 = comb(n0-1, k-1) tmp1 = comb(n1-1, k-2) tmp3 = comb(n0-1, k-2) tmp4 = comb(n1-1, k-1) return (tmp0 * tmp1 + tmp3 * tmp4) / self.comball
def hypergeom(x, fgTotal, bgMatching, bgTotal, log=True): if log: return logchoose(fgTotal, x) + logchoose(bgTotal - fgTotal, bgMatching - x) - logchoose( bgTotal, bgMatching) else: return scipy.comb(fgTotal, x) * scipy.comb( bgTotal - fgTotal, bgMatching - x) / scipy.comb( bgTotal, bgMatching)
def dumb_factor(goal, n): # Assumes factors are not equal to each other and bounded above by # upper_bound. comb0 = comb(n, 2) for i in range(0, n): comb1 = comb(n - i, 2) for j in range(i + 1, n): if goal == round(comb0 - comb1 + (j - i - 1)): return i, j
def hypergProb(k, N, m, n): """ Wikipedia: There is a shipment of N objects in which m are defective. The hypergeometric distribution describes the probability that in a sample of n distinctive objects drawn from the shipment exactly k objects are defective. """ #return float(choose(m, k) * choose(N-m, n-k)) / choose(N, n) hp = float(scipy.comb(m, k) * scipy.comb(N - m, n - k)) / scipy.comb(N, n) if scipy.isnan(hp): stderr.write( "error: not possible to calculate hyperg probability in util.py for k=%d, N=%d, m=%d, n=%d\n" % (k, N, m, n)) stdout.write( "error: not possible to calculate hyperg probability in util.py for k=%d, N=%d, m=%d, n=%d\n" % (k, N, m, n)) return hp
def hypergeometric(x, n, m, N): """ x : number of successes drawn n : number of draws m : number of successes in total N : successes + failures in total. """ if x < max(0, n - (N - m)): return 0. elif x > min(n, m): return 0. else: return comb(N - m, x) * comb(m, n - x) / comb(N, n)
def hypergeometric(x, n, m, N): """ x : number of successes drawn n : number of draws m : number of successes in total N : successes + failures in total. """ if x < max(0, n-(N-m)): return 0. elif x > min(n, m): return 0. else: return comb(N-m, x) * comb(m, n-x) / comb(N,n)
def _calculate_orders(self): k = self.k n = self.n m = self.m dim = self.dim # Calculate the length of each order self.order_idx = np.zeros(n+2, dtype=int) self.order_length = np.zeros(n+1, dtype=int) self.row_counter = 0 for ordi in xrange(n+1): self.order_length[ordi] = (sp.comb(n, ordi+1, exact=1) * ((m-1)**(ordi+1))) self.order_idx[ordi] = self.row_counter self.row_counter += self.order_length[ordi] self.order_idx[n+1] = dim+1 # Calculate nnz for A # not needed for lil sparse format x = (m*np.ones(n))**np.arange(n-1,-1,-1) x = x[:k] y = self.order_length[:k] self.Annz = np.sum(x*y.T)
def triple(g, P, T, r, family): #if not g.index_set_leq(T,P): if g.intersection_matrix[g.schubert_list.index(P)][g.schubert_list.index(T)] == 0: return 0 else: delta = 1 quad, lin = num_equations(g,P, T) subspace_eqns = g.m + r - 1 if g.OG and r > g.k: subspace_eqns += 1 if quad > 0: quad -= 1 if g.type == 'D' and r == g.k: subspace_eqns = g.n+1 #if quad == 0 or single_ruling(g,P,T): if quad == 0: #subspace_eqns -= int((family + h(g,P,T)))%2 delta = int((family + h(g,P,T)))%2 subspace_eqns = g.n if quad > 0: quad -= 1 triple_list = [] for j in range(int(g.N - quad - lin - subspace_eqns)): triple_list.append((-1)**j * 2**(quad - j) * sp.comb(quad,j)) return delta*sum(triple_list)
def backward_difference_formula(k): r""" Construct the k-step backward differentiation method. The methods are implicit and have order k. They have the form: `\sum_{j=0}^{k} \alpha_j y_{n+k-j+1} = h \beta_j f(y_{n+1})` They are generated using equation (1.22') from Hairer & Wanner III.1, along with the binomial expansion. .. note:: Accuracy is lost when evaluating the order conditions for methods with many steps. This could be avoided by using SAGE rationals instead of NumPy doubles for the coefficient representation. **References**: #.[hairer1993]_ pp. 364-365 """ from scipy import comb alpha=np.zeros(k+1) beta=np.zeros(k+1) beta[k]=1. gamma=np.zeros(k+1) gamma[0]=1. alphaj=np.zeros(k+1) for j in range(1,k+1): gamma[j]= 1./j for i in range(0,j+1): alphaj[k-i]=(-1.)**i*comb(j,i)*gamma[j] alpha=alpha+alphaj name=str(k)+'-step BDF method' return LinearMultistepMethod(alpha,beta,name=name)
def triple(g, P, T, r, family): if g.intersection_matrix[g.schubert_list.index(P)][g.schubert_list.index(T)] == 0: return 0 else: delta = 1 quad, lin = num_equations(g,P, T) subspace_eqns = g.m + r - 1 #if g.type == 'D' and quad == 0 and r == g.k: # delta = int(family + h(g,P,T))%2 #if g.type == 'B': # if quad > 0: # quad -=1 # if r > g.k: # subspace_eqns +=1 if g.OG: if r > g.k: subspace_eqns += 1 if quad > 0: quad -= 1 if r == g.k and g.type == 'D': if quad == 0: delta = int(family + h(g,P,T))%2 if quad > 0: subspace_eqns +=1 quad -=1 #subspace_eqns += (1-delta) triple_list = [] for j in range(int(g.N - quad - lin - subspace_eqns)): triple_list.append((-1)**j * 2**(quad - j) * sp.comb(quad,j)) return delta*sum(triple_list)
def similarity_matrix(db, sim_func, check_func): matrix = numpy.zeros((len(db["genomes"]), len(db["genomes"]))) sys.stderr.write("populating matrix\n") num_work_to_do = int(scipy.comb(len(db["genomes"]), 2)) work_done = 0 for nameA, nameB in itertools.combinations(db["genomes"], 2): nameA_num = db["genomes"].index(nameA) nameB_num = db["genomes"].index(nameB) sizeA = db["cds_counts"][nameA_num] sizeB = db["cds_counts"][nameB_num] hits_count = check_func((db["hit_matrix"][nameA_num][nameB_num], db["hit_matrix"][nameB_num][nameA_num])) matrix[nameA_num][nameB_num] = sim_func(sizeA, sizeB, hits_count) matrix[nameB_num][nameA_num] = sim_func(sizeB, sizeA, hits_count) work_done += 1 if work_done % 100 == 0: sys.stderr.write("\r %s/%s completed" % (work_done, num_work_to_do)) sys.stderr.write("\r %s/%s completed\n" % (work_done, num_work_to_do)) return matrix
def triple(self, P, T, r, family): #if not self.index_set_leq(T,P): if self.intersection_matrix[self.schubert_list.index(P)][self.schubert_list.index(T)] == 0: return 0 else: delta = 1 quad, lin = self.outsourced_num_equations(P, T) subspace_eqns = self.m + r - 1 if self.OG and r > self.k: subspace_eqns += 1 if quad > 0: quad -= 1 if self.type == 'D' and r == self.k: subspace_eqns = self.n+1 #if quad == 0 or self.single_ruling(P,T): if quad == 0: #subspace_eqns -= int((family + self.h(P,T)))%2 delta = int((family + self.h(P,T)))%2 subspace_eqns = self.n if quad > 0: quad -= 1 triple_list = [] for j in range(int(self.N - quad - lin - subspace_eqns)): triple_list.append((-1)**j * 2**(quad - j) * sp.comb(quad,j)) return delta*sum(triple_list)
def Test(): # Double check stirlings approximation implementation test_vals = [(30, 5), (18, 7), (91, 32)] for n, k in test_vals: print np.log(scipy.comb(n, k)) print ln_stirling_binomial(n, k) m = np.matrix([[50, 39], [66, 5]]) print CalcPValues(m) prob_m = np.matrix([[0.25, 0.25], [0.25, 0.25]]) print CalcPValues(m, prob_m)
def beta_binomial(k, n, a, b): """The pmf/pdf of the Beta-binomial distribution. Computation based on beta function. See: http://en.wikipedia.org/wiki/Beta-binomial_distribution and http://mathworld.wolfram.com/BetaBinomialDistribution.html k = a vector of non-negative integers <= n n = an integer a = an array of non-negative real numbers b = an array of non-negative real numbers """ return (comb(n, k) * beta(k+a, n-k+b) / beta(a,b)).prod(0)
def mnc2cum_g(mnc_): '''convert non-central moments to cumulants recursive formula produces as many cumulants as moments http://en.wikipedia.org/wiki/Cumulant#Cumulants_and_moments ''' mnc = [1] + list(mnc_) kappa = [1] for nn,m in enumerate(mnc[1:]): n = nn+1 kappa.append(m) for k in range(1,n): kappa[n] -= scipy.comb(n-1,k-1,exact=1) * kappa[k]*mnc[n-k] return kappa[1:]
def mnc2cum(mnc_): '''convert non-central moments to cumulants recursive formula produces as many cumulants as moments http://en.wikipedia.org/wiki/Cumulant#Cumulants_and_moments ''' mnc = [1] + list(mnc_) kappa = [1] for nn,m in enumerate(mnc[1:]): n = nn+1 kappa.append(m) for k in range(1,n): kappa[n] -= scipy.comb(n-1,k-1,exact=1) * kappa[k]*mnc[n-k] return kappa[1:]
def mc2mnc_g(mc_): '''convert central to non-central moments, uses recursive formula optionally adjusts first moment to return mean ''' n = len(mc_) mean = mc_[0] mc = [1] + list(mc_) # add zero moment = 1 mc[1] = 0 # define central mean as zero for formula mnc = [1, mean] # zero and first raw moments for nn, m in enumerate(mc[2:]): n = nn + 2 mnc.append(0) for k in range(n + 1): mnc[n] += scipy.comb(n, k, exact=1) * mc[k] * mean**(n - k) return mnc[1:]
def mnc2mc(mnc_, wmean = True): '''convert non-central to central moments, uses recursive formula optionally adjusts first moment to return mean ''' n = len(mnc_) mean = mnc_[0] mnc = [1] + list(mnc_) # add zero moment = 1 mu = [] #np.zeros(n+1) for n,m in enumerate(mnc): mu.append(0) #[scipy.comb(n-1,k,exact=1) for k in range(n)] for k in range(n+1): mu[n] += (-1)**(n-k) * scipy.comb(n,k,exact=1) * mnc[k] * mean**(n-k) if wmean: mu[1] = mean return mu[1:]
def mc2mnc(mc_): '''convert central to non-central moments, uses recursive formula optionally adjusts first moment to return mean ''' n = len(mc_) mean = mc_[0] mc = [1] + list(mc_) # add zero moment = 1 mc[1] = 0 # define central mean as zero for formula mnc = [1, mean] # zero and first raw moments for nn,m in enumerate(mc[2:]): n=nn+2 mnc.append(0) for k in range(n+1): mnc[n] += scipy.comb(n,k,exact=1) * mc[k] * mean**(n-k) return mnc[1:]
def mnc2mc_g(mnc_, wmean = True): '''convert non-central to central moments, uses recursive formula optionally adjusts first moment to return mean ''' n = len(mnc_) mean = mnc_[0] mnc = [1] + mnc_ # index numbering starting at 1, or zero moment mu = [] #np.zeros(n+1) for n,m in enumerate(mnc): mu.append(0) #[scipy.comb(n-1,k,exact=1) for k in range(n)] for k in range(n+1): mu[n] += (-1)**(n-k) * scipy.comb(n,k,exact=1) * mnc[k] * mean**(n-k) if wmean: mu[1] = mean return mu[1:]
def mnc2mc(_mnc, wmean=True): '''convert non-central to central moments, uses recursive formula optionally adjusts first moment to return mean ''' n = len(_mnc) mean = _mnc[0] mnc = [1] + list(_mnc) # add zero moment = 1 mu = [] #np.zeros(n+1) for n, m in enumerate(mnc): mu.append(0) #[scipy.comb(n-1,k,exact=1) for k in range(n)] for k in range(n + 1): mu[n] += (-1)**(n - k) * scipy.comb( n, k, exact=1) * mnc[k] * mean**(n - k) if wmean: mu[1] = mean return mu[1:]
def cum2mc(kappa_): '''convert non-central moments to cumulants recursive formula produces as many cumulants as moments References ---------- Kenneth Lange: Numerical Analysis for Statisticians, page 40 (http://books.google.ca/books?id=gm7kwttyRT0C&pg=PA40&lpg=PA40&dq=convert+cumulants+to+moments&source=web&ots=qyIaY6oaWH&sig=cShTDWl-YrWAzV7NlcMTRQV6y0A&hl=en&sa=X&oi=book_result&resnum=1&ct=result) ''' mc = [1,0.0] #kappa_[0]] #insert 0-moment and mean kappa = [1] + list(kappa_) for nn,m in enumerate(kappa[2:]): n = nn+2 mc.append(0) for k in range(n-1): mc[n] += scipy.comb(n-1,k,exact=1) * kappa[n-k]*mc[k] mc[1] = kappa_[0] # insert mean as first moments by convention return mc[1:]
def cum2mc_g(kappa_): '''convert non-central moments to cumulants recursive formula produces as many cumulants as moments References ---------- Kenneth Lange: Numerical Analysis for Statisticians, page 40 (http://books.google.ca/books?id=gm7kwttyRT0C&pg=PA40&lpg=PA40&dq=convert+cumulants+to+moments&source=web&ots=qyIaY6oaWH&sig=cShTDWl-YrWAzV7NlcMTRQV6y0A&hl=en&sa=X&oi=book_result&resnum=1&ct=result) ''' mc = [1, 0.0] #kappa_[0]] #insert 0-moment and mean kappa = [1] + list(kappa_) for nn, m in enumerate(kappa[2:]): n = nn + 2 mc.append(0) for k in range(n - 1): mc[n] += scipy.comb(n - 1, k, exact=1) * kappa[n - k] * mc[k] mc[1] = kappa_[0] # insert mean as first moments by convention return mc[1:]
def diallelic_approximation_d(N_small, g, m0, m1): """ This is experimental. The numerical integration should be replaced by a call to the confluent hypergeometric function hyp1f1. See also http://functions.wolfram.com/HypergeometricFunctions/ Hypergeometric1F1/03/01/04/01/ . Also www.cs.unc.edu/Research/Image/MIDAG/p01/biostat/Digital_1.pdf . Also a gsl implementation gsl_sf_hyperg_1F1_int in hyperg_1F1.c specifically hyperg_1F1_ab_posint for positive integers a and b. Also http://mathworld.wolfram.com/ ConfluentHypergeometricFunctionoftheFirstKind.html """ hist = np.zeros(N_small + 1) for n0 in range(1, N_small): n1 = N_small - n0 prefix = scipy.comb(n0+n1, n0) * special.beta(n0, n1) hist[n0] += m0 * prefix * diallelic_d_helper(n0, n1, g) hist[n0] += m1 * prefix * diallelic_d_helper(n1, n0, -g) return hist[1:-1] / np.sum(hist[1:-1])
def diallelic_approximation_d(N_small, g, m0, m1): """ This is experimental. The numerical integration should be replaced by a call to the confluent hypergeometric function hyp1f1. See also http://functions.wolfram.com/HypergeometricFunctions/ Hypergeometric1F1/03/01/04/01/ . Also www.cs.unc.edu/Research/Image/MIDAG/p01/biostat/Digital_1.pdf . Also a gsl implementation gsl_sf_hyperg_1F1_int in hyperg_1F1.c specifically hyperg_1F1_ab_posint for positive integers a and b. Also http://mathworld.wolfram.com/ ConfluentHypergeometricFunctionoftheFirstKind.html """ hist = np.zeros(N_small + 1) for n0 in range(1, N_small): n1 = N_small - n0 prefix = scipy.comb(n0 + n1, n0) * special.beta(n0, n1) hist[n0] += m0 * prefix * diallelic_d_helper(n0, n1, g) hist[n0] += m1 * prefix * diallelic_d_helper(n1, n0, -g) return hist[1:-1] / np.sum(hist[1:-1])
def Adams_Moulton(k): r""" Construct the k-step, Adams-Moulton method. The methods are implicit and have order k+1. They have the form: `y_{n+1} = y_n + h \sum_{j=0}^{k} \beta_j f(y_n-k+j+1)` They are generated using equation (1.9) and the equation in Exercise 3 from Hairer & Wanner III.1, along with the binomial expansion. .. note:: Accuracy is lost when evaluating the order conditions for methods with many steps. This could be avoided by using SAGE rationals instead of NumPy doubles for the coefficient representation. References: [hairer1993]_ """ from scipy import comb alpha=np.zeros(k+1) beta=np.zeros(k+1) alpha[k]=1. alpha[k-1]=-1. gamma=np.zeros(k+1) gamma[0]=1. beta[k]=1. betaj=np.zeros(k+1) for j in range(1,k+1): gamma[j]= -sum(gamma[:j]/np.arange(j+1,1,-1)) for i in range(0,j+1): betaj[k-i]=(-1.)**i*comb(j,i)*gamma[j] beta=beta+betaj name=str(k)+'-step Adams-Moulton method' return LinearMultistepMethod(alpha,beta,name=name)
def _munp(self, n, c): k = np.arange(0, n + 1) val = (1.0 / c)**n * np.sum(comb(n, k) * (-1)**k / (1.0 + c * k), axis=0) return where(c * n > -1, val, inf)
print 'zpkToCoeffs: ', scipy.allclose(rb, sb) and scipy.allclose(ra, sa) # Test the lpTolp functions a = scipy.array([1, -0.96, 0.80]) b = scipy.array([1, 0.5, 0.5]) freq = 0.23 rb, ra = loudia.lowPassToLowPass(b, a, freq) sb, sa = scipy.signal.lp2lp(b, a, freq) print 'lpTolp: ', scipy.allclose(rb, sb) and scipy.allclose(ra, sa) # Test the comb function rc = loudia.combination(5, 3) sc = scipy.comb(5, 3) print 'combination: ', round(sc) == round(rc) # Test the bilinear function a = scipy.array([10, -0.96, 0.80]) b = scipy.array([156, 0.5, 0.5]) rb, ra = loudia.bilinear(b, a, 1.0) sb, sa = scipy.signal.bilinear(b, a) # The loudia bilinear function does not return the coefficients normalized rb = rb / ra[:, 0] ra = ra / ra[:, 0] print 'bilinear: ', scipy.allclose(rb, sb) and scipy.allclose(ra, sa)
lines = [] headings = [ r"N_{\uparrow}", r"U/\mu B", r"M/N\mu", r"\Omega", r"S/k", r"kT/\mu B", r"C/Nk" ] nel = len(headings) header = r"$" + r"$ & $".join(headings) + r"$ \\ \midrule \addlinespace[5pt]" lines.append(r"\begin{tabular}{rrrcrrc}") lines.append(header) tableRows = [] for Nup in all_Nup: Ndown = np.float96(Ntot - Nup) U_by_mu_B = np.float96(Ntot - 2 * Nup) M_by_N_mu = np.float96(Nup - Ndown) / np.float96(Ntot) Omega = sp.comb(Ntot, Nup, exact=True) S_by_k = np.log(np.float96(Omega)) tableRows.append([Nup, U_by_mu_B, M_by_N_mu, Omega, S_by_k]) rowNum = 0 for row in tableRows: if rowNum == 0 or rowNum == Ntot: kT_by_mu_B = 0 else: kT_by_mu_B = (tableRows[rowNum+1][1]-tableRows[rowNum-1][1]) / \ (tableRows[rowNum+1][4]-tableRows[rowNum-1][4]) tableRows[rowNum].append(kT_by_mu_B) rowNum += 1 rowNum = 0 for row in tableRows:
def runs_prob_even(self, r): n0, n1 = self.n0, self.n1 tmp0 = comb(n0 - 1, r // 2 - 1) tmp1 = comb(n1 - 1, r // 2 - 1) return tmp0 * tmp1 * 2. / self.comball
def bin_pdf(n,p,k): return comb(n,k) * p**k * (1-p)**(n-k) def bin_cdf(n,p,m): return sum( (bin_pdf(n,p,k) for k in range(m+1)) )
def runs_prob_even(self, r): n0, n1 = self.n0, self.n1 tmp0 = comb(n0-1, r//2-1) tmp1 = comb(n1-1, r//2-1) return tmp0 * tmp1 * 2. / self.comball
def __init__(self, n0, n1): self.n0 = n0 self.n1 = n1 self.n = n = n0 + n1 self.comball = comb(n, n1)
#funcion que simula mil votaciones 100 veces para calcular un promedio de las mediciones p = 0.0 a = 0.0 b = 0.0 c = 0.0 p1 = 0.0 pdf_x = [] pdf_y = [] for r in range(0,1000): a = comb (33,18)*((float(r*0.001))**18)*((1-float(r*0.001))**15) p+=a pdf_x.append(r*0.001) r+=1 for r in range(0,1000): c = comb (33,18)*((float(r*0.001))**18)*((1-float(r*0.001))**15) p2 = c/p pdf_y.append(p2) p1+=p2 r+=1 #hasta aca solo se definieron los ejes del primer grafico de la pdf print(max(pdf_y)) for i in range (0,len(pdf_y)):
def kuiper_FPP(D, N): """Compute the false positive probability for the Kuiper statistic. Uses the set of four formulas described in Paltani 2004; they report the resulting function never underestimates the false positive probability but can be a bit high in the N=40..50 range. (They quote a factor 1.5 at the 1e-7 level. Parameters ---------- D : float The Kuiper test score. N : float The effective sample size. Returns ------- fpp : float The probability of a score this large arising from the null hypothesis. Reference --------- Paltani, S., "Searching for periods in X-ray observations using Kuiper's test. Application to the ROSAT PSPC archive", Astronomy and Astrophysics, v.240, p.789-790, 2004. """ if D < 0. or D > 2.: raise ValueError("Must have 0<=D<=2 by definition of the Kuiper test") if D < 2. / N: return 1. - factorial(N) * (D - 1. / N)**(N - 1) elif D < 3. / N: k = -(N * D - 1.) / 2. r = sqrt(k**2 - (N * D - 2.) / 2.) a, b = -k + r, -k - r return 1. - factorial(N - 1) * (b**(N - 1.) * (1. - a) - a**(N - 1.) * (1. - b)) / float(N)**(N - 2) * (b - a) elif (D > 0.5 and N % 2 == 0) or (D > (N - 1.) / (2. * N) and N % 2 == 1): def T(t): y = D + t / float(N) return y**(t - 3) * (y**3 * N - y**2 * t * (3. - 2. / N) / N - t * (t - 1) * (t - 2) / float(N)**2) s = 0. # NOTE: the upper limit of this sum is taken from Stephens 1965 for t in xrange(int(floor(N * (1 - D))) + 1): term = T(t) * comb(N, t) * (1 - D - t / float(N))**(N - t - 1) s += term return s else: z = D * sqrt(N) S1 = 0. term_eps = 1e-12 abs_eps = 1e-100 for m in itertools.count(1): T1 = 2. * (4. * m**2 * z**2 - 1.) * exp(-2. * m**2 * z**2) so = S1 S1 += T1 if abs(S1 - so) / (abs(S1) + abs(so)) < term_eps or abs(S1 - so) < abs_eps: break S2 = 0. for m in itertools.count(1): T2 = m**2 * (4. * m**2 * z**2 - 3.) * exp(-2 * m**2 * z**2) so = S2 S2 += T2 if abs(S2 - so) / (abs(S2) + abs(so)) < term_eps or abs(S1 - so) < abs_eps: break return S1 - 8 * D / (3. * sqrt(N)) * S2
def CalcPValues(count_mat, prob_mat=None, exact=False): """Calculates p-values for the co-incidence matrix of two variables. Computes the probability we would observe a greater (i,j) value in mat by randomly sampling from the global distribution of row and column values. Suppose the rows are oxygen requirements (aerobe, anaerobe) and the columns are genotypes (ed, emp). Overall, there are 75 aerobes and 75 anaerobes, while there are 100 emp genotypes and 50 ed. If the genes and oxygen requirements were randomly distributed among organisms independently then the probability of randomly choosing an anaerobe with ed genes would be (50/150) * (75/150) = 1/6 (meaning there should be 25 such organisms). However, suppose we observe that 1/4 of organisms (39) are anaerobes with ed genes. The probability of observing a more extreme value than 1/4 by drawing randomly is sum_{i > 39} ((150 choose i) * 1/6^i * (5/6)^(150-i)) which is the sum of the probability of randomly choosing any single value greater than 39. Args: count_mat: matrix with independent variable values on rows, dependents on columns. prob_mat: the matrix with probabilities of each (i,j) point. If None, will computed according to counts. exact: if exact binomial coefficients should be computed. Returns: A matrix with the same shape as mat with a p-value at each i,j position. """ total = np.sum(count_mat) ceil_total = int(np.ceil(total)) total2 = float(total**2) pval_mat = np.matrix(np.zeros(count_mat.shape)) rows, cols = count_mat.shape for i in xrange(rows): for j in xrange(cols): # Get the total count observed_count = count_mat[i, j] # Compute or fetch the probability of this pair. if prob_mat is not None: prob = prob_mat[i, j] else: total_ind = np.sum(count_mat[i, :]) total_dep = np.sum(count_mat[:, j]) prob = float(total_ind * total_dep) / total2 # Calculate a p-value for each more extreme case. pvals = [] floor_count = int(np.floor(observed_count)) for higher_count in xrange(floor_count + 1, ceil_total + 1): comb = scipy.comb(ceil_total, higher_count, exact=exact) # If we can't compute the actual value of the binomial coefficient # then approximate the log using stirlings approximation if not np.isfinite(comb): comb = ln_stirling_binomial(ceil_total, higher_count) pval = np.log(comb) pval += (higher_count * np.log(prob)) pval += ((total - higher_count) * np.log(1 - prob)) pvals.append(np.exp(pval)) # our p-value is the sum over all more extreme cases pval_mat[i, j] = np.sum(pvals) return pval_mat
def kmers2int(): work_queue = Queue() result_queue = Queue() work_to_do = itertools.combinations(range(len(db["genomes"])), 2) num_work_to_do = int(round(scipy.comb(len(db["genomes"]), 2))) while True: batch = list(itertools.islice(work_to_do, 1000)) if not batch: break work_queue.put(batch) for i in range(options.num_tasks): work_queue.put(False) tasks = [Process(target=kmers2int_worker, args=(work_queue, result_queue)) for i in range(options.num_tasks)] for t in tasks: t.start() sys.stderr.write("populating matrix\n") true_s_time = time.time() s_time = time.time() status_change = True work_done = 0 delta_time = [] while True: for _ in range(10): try: data = result_queue.get(block=False) db["hit_matrix"][data[0]][data[1]] = data[2] work_done += 1 status_change = True except Empty: break if sum([1 for t in tasks if t.is_alive()]) == 0: break if status_change and work_done % 100 == 0: try: work_per_time = float(100 * len(delta_time)) / float(sum(delta_time)) except ZeroDivisionError: work_per_time = 0 delta_time.append(time.time() - s_time) try: time_to_finish = (num_work_to_do - work_done) / work_per_time except ZeroDivisionError: time_to_finish = 0 sys.stderr.write("\r%-79s" % (" %s/%s completed [%.2f/s, %s remaining]" % (work_done, num_work_to_do, work_per_time, formattime(time_to_finish)),)) if len(delta_time) == 100: delta_time = delta_time[1:] s_time = time.time() status_change = False for t in tasks: t.join() sys.stderr.write("\r%-79s\n" % (" %s/%s completed in %s" % (work_done, num_work_to_do, formattime(time.time() - true_s_time)),))
#-- Table header lines = [] headings = [r"N_{\uparrow}", r"U/\mu B", r"M/N\mu", r"\Omega", r"S/k", r"kT/\mu B", r"C/Nk"] nel = len(headings) header = r"$" + r"$ & $".join(headings) + r"$ \\ \midrule \addlinespace[5pt]" lines.append(r"\begin{tabular}{rrrcrrc}") lines.append(header) tableRows = [] for Nup in all_Nup: Ndown = np.float96(Ntot - Nup) U_by_mu_B = np.float96(Ntot - 2*Nup) M_by_N_mu = np.float96(Nup-Ndown)/np.float96(Ntot) Omega = sp.comb(Ntot, Nup, exact=True) S_by_k = np.log(np.float96(Omega)) tableRows.append( [Nup, U_by_mu_B, M_by_N_mu, Omega, S_by_k] ) rowNum = 0 for row in tableRows: if rowNum == 0 or rowNum == Ntot: kT_by_mu_B = 0 else: kT_by_mu_B = (tableRows[rowNum+1][1]-tableRows[rowNum-1][1]) / \ (tableRows[rowNum+1][4]-tableRows[rowNum-1][4]) tableRows[rowNum].append( kT_by_mu_B ) rowNum += 1 rowNum = 0 for row in tableRows:
def learn(A,K,net0={},opts={}): """ runs variational bayes for inference of network modules (i.e. community detection) under a constrained stochastic block model. net0 and opts inputs are optional. if provided, length(net0.a0) must equal K. inputs: A: N-by-N undirected (symmetric), binary adjacency matrix w/o self-edges (note: fastest for sparse and logical A) K: (maximum) number of modules net0: initialization/hyperparameter structure for network net0['Q0']: N-by-K initial mean-field matrix (rows sum to 1) net0['ap0']: alpha_{+0}, hyperparameter for prior on \theta_+ net0['bp0']: beta_{+0}, hyperparameter for prior on \theta_+ net0['am0']: alpha_{-0}, hyperparameter for prior on \theta_- net0['bm0']: beta_{-0}, hyperparameter for prior on \theta_- net0['a0']: alpha_{\mu0}, 1-by-K vector of hyperparameters for prior on \pi opts: options opts['TOL_DF']: tolerance on change in F (outer loop) opts['MAX_FITER']: maximum number of F iterations (outer loop) opts['VERBOSE']: verbosity (0=quiet (default),1=print, 2=figures) outputs: net: posterior structure for network net['F']: converged free energy (same as net.F_iter(end)) net['F_iter']: free energy over iterations (learning curve) net['Q']: N-by-K mean-field matrix (rows sum to 1) net['K']: K, passed for compatibility with vbmod_restart net['ap']: alpha_+, hyperparameter for posterior on \theta_+ net['bp']: beta_+, hyperparameter for posterior on \theta_+ net['am']: alpha_-, hyperparameter for posterior on \theta_- net['bm']: beta_-, hyperparameter for posterior on \theta_- net['a']: alpha_{\mu}, 1-by-K vector of hyperparameters for posterior on \pi """ # default options TOL_DF=1e-2 MAX_FITER=30 VERBOSE=0 SAVE_ITER=0 #print "get options from opts struct" if (type(opts) == type({})) and (len(opts) > 0): if 'TOL_DF' in opts: TOL_DF=opts['TOL_DF'] if 'MAX_FITER' in opts: MAX_FITER=opts['MAX_FITER'] if 'VERBOSE' in opts: VERBOSE=opts['VERBOSE'] if 'SAVE_ITER' in opts: SAVE_ITER=opts['SAVE_ITER'] N=A.shape[0] # number of nodes M=0.5*A.sum(0).sum(1) # total number of non-self edges M=M[0,0] C=comb(N,2) # total number of possible edges between N nodes uk=mat(ones([K,1])) un=mat(ones([N,1])) #print "default prior hyperparameters" ap0=2; bp0=1; am0=1; bm0=2; a0=ones([1,K]); #print "get initial Q0 matrix and prior hyperparameters from net0 struct" if (type(net0) == type({})) and (len(net0) > 0): if 'Q0' in net0: Q=net0['Q0'] if 'ap0' in net0: ap0=net0['ap0'] if 'bp0' in net0: bp0=net0['bp0'] if 'am0' in net0: am0=net0['am0'] if 'bm0' in net0: bm0=net0['bm0'] if 'a0' in net0: a0=net0['a0'] #print "initialize Q if not provided" try: Q except NameError: Q=init(N,K) Qmat=mat(Q) #print "size of Q=", Q.shape #Q=init(N,K) # ensure a0 is a 1-by-K vector assert(a0.shape == (1,K)) #print "intialize variational distribution hyperparameters to be equal to prior hyperparameters" ap=ap0 bp=bp0 am=am0 bm=bm0 a=a0 n=Q.sum(0) # get indices of non-zero row/columns # to be passed to vbmod_estep_inline # jntj: must be better way (rows,cols)=A.nonzero() # vector to store free energy over iterations F=[] for i in range(MAX_FITER): #################### #VBE-step, to update mean-field Q matrix over module assignments #################### # compute local and global coupling constants, JL and JG and # chemical potentials -lnpi psiap=digamma(ap) psibp=digamma(bp) psiam=digamma(am) psibm=digamma(bm) psip=digamma(ap+bp) psim=digamma(am+bm) JL=psiap-psibp-psiam+psibm JG=psibm-psim-psibp+psip lnpi=digamma(a)-digamma(sum(a)) estep_inline(rows,cols,Q,float(JL),float(JG),array(lnpi),array(n)) """ # local update (technically correct, but slow) for l in range(N): # exclude Q[l,:] from contributing to its own update Q[l,:]=zeros([1,K]) # jntj: doesn't take advantage of sparsity Al=mat(A.getrow(l).toarray()) AQl=multiply((Al.T*uk.T),Q).sum(0) nl=Q.sum(0) lnQl=JL*AQl-JG*nl+lnpi lnQl=lnQl-lnQl.max() Q[l,:]=exp(lnQl) Q[l,:]=Q[l,:]/Q[l,:].sum() """ #################### #VBM-step, update distribution over parameters #################### # compute expected occupation numbers <n*>s n=Qmat.sum(0) # compute expected edge counts <n**>s #QTAQ=mat((Q.T*A*Q).toarray()) #npp=0.5*trace(QTAQ) npp=0.5*(Qmat.T*A*Qmat).diagonal().sum() npm=0.5*trace(Qmat.T*(un*n-Qmat))-npp nmp=M-npp nmm=C-M-npm # compute hyperparameters for beta and dirichlet distributions over # theta_+, theta_-, and pi_mu ap=npp+ap0 bp=npm+bp0 am=nmp+am0 bm=nmm+bm0 a=n+a0 #print ap, bp, am, bm, a # evaluate variational free energy, an approximation to the # negative log-evidence F.append(betaln(ap,bp)-betaln(ap0,bp0)+betaln(am,bm)-betaln(am0,bm0)+sum(gammaln(a))-gammaln(sum(a))-(sum(gammaln(a0))-gammaln(sum(a0)))-sum(multiply(Qmat,log(Qmat)))) F[i]=-F[i] print "iteration", i+1 , ": F =", F[i] # F should always decrease if (i>1) and F[i] > F[i-1]: print "\twarning: F increased from", F[i-1] ,"to", F[i] if (i>1) and (abs(F[i]-F[i-1]) < TOL_DF): break return dict(F=F[-1],F_iter=F,Q=Q,K=K,ap=ap,bp=bp,am=am,bm=bm,a=a)
def pmf(self, m,n): perm = comb(m+n, m) T2 = self.values * m * n / ((m+n)**2 * lcm(m,n)**2) # zeta -> T2 pmf = self.frequencies / perm return T2, pmf
def bin_pdf(n, p, k): return comb(n, k) * p**k * (1 - p)**(n - k)
def pmf(self, m, n): perm = comb(m + n, m) T2 = self.values * m * n / ((m + n)**2 * lcm(m, n)**2) # zeta -> T2 pmf = self.frequencies / perm return T2, pmf
def M_step(self, anneal, model_params, my_suff_stat, my_data): """ BSC M_step my_data variables used: my_data['y'] Datapoints my_data['candidates'] Candidate H's according to selection func. Annealing variables used: anneal['T'] Temperature for det. annealing anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model """ comm = self.comm H, Hprime = self.H, self.Hprime gamma = self.gamma W = model_params['W'].T pies = model_params['pi'] sigma = model_params['sigma'] mu = model_params['mu'] # Read in data: my_y = my_data['y'].copy() candidates = my_data['candidates'] logpj_all = my_suff_stat['logpj'] all_denoms = np.exp(logpj_all).sum(axis=1) my_N, D = my_y.shape N = comm.allreduce(my_N) # Joerg's data noise idea data_noise_scale = anneal['data_noise'] if data_noise_scale > 0: my_y += my_data['data_noise'] SM = self.state_matrix # shape: (no_states, Hprime) # To compute et_loglike: my_ldenom_sum = 0.0 ldenom_sum = 0.0 # Precompute factor for pi update A_pi_gamma = 0 B_pi_gamma = 0 for gamma_p in range(gamma + 1): A_pi_gamma += comb(H, gamma_p) * (pies**gamma_p) * ( (1 - pies)**(H - gamma_p)) B_pi_gamma += gamma_p * comb(H, gamma_p) * (pies**gamma_p) * ( (1 - pies)**(H - gamma_p)) E_pi_gamma = pies * H * A_pi_gamma / B_pi_gamma # Truncate data if anneal['Ncut_factor'] > 0.0: tracing.tracepoint("M_step:truncating") #alpha = 0.9 # alpha from ET paper #N_use = int(alpha * (N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))) N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor'])) cut_denom = parallel.allsort(all_denoms)[-N_use] which = np.array(all_denoms >= cut_denom) candidates = candidates[which] logpj_all = logpj_all[which] my_y = my_y[which] my_N, D = my_y.shape N_use = comm.allreduce(my_N) else: N_use = N dlog.append('N', N_use) # Calculate truncated Likelihood L = H * np.log(1 - pies) - 0.5 * D * np.log( 2 * pi * sigma**2) - np.log(A_pi_gamma) Fs = np.log(np.exp(logpj_all).sum(axis=1)).sum() L += comm.allreduce(Fs) / N_use dlog.append('L', L) # Precompute pil_bar = np.log(pies / (1. - pies)) corr_all = logpj_all.max(axis=1) # shape: (my_N,) pjb_all = np.exp(logpj_all - corr_all[:, None]) # shape: (my_N, no_states) # Allocate my_Wp = np.zeros_like(W) # shape (H, D) my_Wq = np.zeros((H, H)) # shape (H, H) my_pi = 0.0 # my_sigma = 0.0 # #my_mup = np.zeros_like(W) # shape (H, D) #my_muq = np.zeros((H,H)) # shape (H, H) my_mus = np.zeros(H) # shape D data_sum = my_y.sum(axis=0) # sum over all data points for mu update ## Calculate mu #for n in xrange(my_N): #tracing.tracepoint("Calculationg offset") #y = my_y[n,:] # length D #cand = candidates[n,:] # length Hprime #logpj = logpj_all[n,:] # length no_states #corr = corr_all[n] # scalar #pjb = pjb_all[n, :] ## Zero active hidden cause (do nothing for the W and pi case) ## this_Wp += 0. # nothing to do ## this_Wq += 0. # nothing to do ## this_pi += 0. # nothing to do ## One active hidden cause #this_mup = np.outer(pjb[1:(H+1)],y) #this_muq = pjb[1:(H+1)] * np.identity(H) #this_mus = pjb[1:(H+1)] ## Handle hidden states with more than 1 active cause #this_mup[cand] += np.dot(np.outer(y,pjb[(1+H):]),SM).T #this_muq_tmp = np.zeros_like(my_muq[cand]) #this_muq_tmp[:,cand] = np.dot(pjb[(1+H):] * SM.T,SM) #this_muq[cand] += this_muq_tmp #this_mus[cand] += np.inner(SM.T,pjb[(1+H):]) #denom = pjb.sum() #my_mup += this_mup / denom #my_muq += this_muq / denom #my_mus += this_mus / denom ## Calculate updated mu #if 'mu' in self.to_learn: #tracing.tracepoint("M_step:update mu") #mup = np.empty_like(my_mup) #muq = np.empty_like(my_muq) #mus = np.empty_like(my_mus) #all_data_sum = np.empty_like(data_sum) #comm.Allreduce( [my_mup, MPI.DOUBLE], [mup, MPI.DOUBLE] ) #comm.Allreduce( [my_muq, MPI.DOUBLE], [muq, MPI.DOUBLE] ) #comm.Allreduce( [my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE] ) #comm.Allreduce( [data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE] ) #mu_numer = all_data_sum - np.dot(mus,np.dot(np.linalg.inv(muq), mup)) #mu_denom = my_N - np.dot(mus,np.dot(np.linalg.inv(muq), mus)) #mu_new = mu_numer/ mu_denom #else: #mu_new = mu # Iterate over all datapoints tracing.tracepoint("M_step:iterating") for n in range(my_N): y = my_y[n, :] - mu # length D cand = candidates[n, :] # length Hprime pjb = pjb_all[n, :] this_Wp = np.zeros_like( my_Wp) # numerator for current datapoint (H, D) this_Wq = np.zeros_like( my_Wq) # denominator for current datapoint (H, H) this_pi = 0.0 # numerator for pi update (current datapoint) # Zero active hidden cause (do nothing for the W and pi case) # this_Wp += 0. # nothing to do # this_Wq += 0. # nothing to do # this_pi += 0. # nothing to do # One active hidden cause this_Wp = np.outer(pjb[1:(H + 1)], y) this_Wq = pjb[1:(H + 1)] * np.identity(H) this_pi = pjb[1:(H + 1)].sum() this_mus = pjb[1:(H + 1)].copy() # Handle hidden states with more than 1 active cause this_Wp[cand] += np.dot(np.outer(y, pjb[(1 + H):]), SM).T this_Wq_tmp = np.zeros_like(my_Wq[cand]) this_Wq_tmp[:, cand] = np.dot(pjb[(1 + H):] * SM.T, SM) this_Wq[cand] += this_Wq_tmp this_pi += np.inner(pjb[(1 + H):], SM.sum(axis=1)) this_mus[cand] += np.inner(SM.T, pjb[(1 + H):]) denom = pjb.sum() my_Wp += this_Wp / denom my_Wq += this_Wq / denom my_pi += this_pi / denom my_mus += this_mus / denom # Calculate updated W if 'W' in self.to_learn: tracing.tracepoint("M_step:update W") Wp = np.empty_like(my_Wp) Wq = np.empty_like(my_Wq) comm.Allreduce([my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE]) comm.Allreduce([my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE]) #W_new = np.dot(np.linalg.inv(Wq), Wp) #W_new = np.linalg.solve(Wq, Wp) # TODO check and switch to this one rcond = -1 if float(np.__version__[2:]) >= 14.0: rcond = None W_new = np.linalg.lstsq( Wq, Wp, rcond=rcond)[0] # TODO check and switch to this one else: W_new = W # Calculate updated pi if 'pi' in self.to_learn: tracing.tracepoint("M_step:update pi") pi_new = E_pi_gamma * comm.allreduce(my_pi) / H / N_use else: pi_new = pies # Calculate updated sigma if 'sigma' in self.to_learn: tracing.tracepoint("M_step:update sigma") # Loop for sigma update: for n in range(my_N): y = my_y[n, :] - mu # length D cand = candidates[n, :] # length Hprime logpj = logpj_all[n, :] # length no_states corr = logpj.max() # scalar pjb = np.exp(logpj - corr) # Zero active hidden causes this_sigma = pjb[0] * (y**2).sum() # Hidden states with one active cause this_sigma += (pjb[1:(H + 1)] * ((W - y)**2).sum(axis=1)).sum() # Handle hidden states with more than 1 active cause SM = self.state_matrix # is (no_states, Hprime) W_ = W[cand] # is (Hprime x D) Wbar = np.dot(SM, W_) this_sigma += (pjb[(H + 1):] * ((Wbar - y)**2).sum(axis=1)).sum() denom = pjb.sum() my_sigma += this_sigma / denom sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use) else: sigma_new = sigma # Calculate updated mu: if 'mu' in self.to_learn: tracing.tracepoint("M_step:update mu") mus = np.empty_like(my_mus) all_data_sum = np.empty_like(data_sum) comm.Allreduce([my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE]) comm.Allreduce([data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE]) mu_new = all_data_sum / my_N - np.inner(W_new.T / my_N, mus) else: mu_new = mu for param in anneal.crit_params: exec('this_param = ' + param) anneal.dyn_param(param, this_param) dlog.append('N_use', N_use) return {'W': W_new.T, 'pi': pi_new, 'sigma': sigma_new, 'mu': mu_new}
def avail(p, n, k): q = 1 - p return sum(comb(n, i) * (p**i) * (q**(n - i)) for i in xrange(k, n + 1))
def _munp(self, n, c): k = np.arange(0,n+1) val = (1.0/c)**n * np.sum(comb(n,k)*(-1)**k / (1.0+c*k),axis=0) return where(c*n > -1, val, inf)
# Test the lpTolp functions a = scipy.array([1, -0.96, 0.80]) b = scipy.array([1, 0.5, 0.5]) freq = 0.23 rb, ra = loudia.lowPassToLowPass(b, a, freq) sb, sa = scipy.signal.lp2lp(b, a, freq) print 'lpTolp: ', scipy.allclose(rb, sb) and scipy.allclose(ra, sa) # Test the comb function rc = loudia.combination(5, 3) sc = scipy.comb(5, 3) print 'combination: ', round(sc) == round(rc) # Test the bilinear function a = scipy.array([10, -0.96, 0.80]) b = scipy.array([156, 0.5, 0.5]) rb, ra = loudia.bilinear(b, a, 1.0) sb, sa = scipy.signal.bilinear(b, a) # The loudia bilinear function does not return the coefficients normalized rb = rb / ra[:,0] ra = ra / ra[:,0] print 'bilinear: ', scipy.allclose(rb, sb) and scipy.allclose(ra, sa)
1], 'or 1 in', 1. / freq[1] #('Prob of 0 sets for the independent case', 0.00038668213395202477, 'or 1 in', 2586.1034482758619) nsolutions = [] n15 = [] for i in range(5000): count15 = 0 for cards in one_game(): if cards.shape[1] > 12: count15 += 1 nsolutions.append(len(find_sets2(cards))) n15.append(count15) pylab.gcf().set_size_inches(11, 6.4) freq, bins, _ = pylab.hist(nsolutions, bins=range(-1, 14), normed=1, hold=0) pylab.draw() print 'Prob of 0 sets for 15 cards', freq[1], 'or 1 in', 1. / freq[1] # ('Prob of 0 sets for 15 cards', 0.010681255698840693, 'or 1 in', 93.621951219512198) freq, bins, _ = pylab.hist(n15, bins=range(-1, 14), normed=1, hold=0) print 'number of triplets in 12 cards', scipy.comb(12, 3) print 'number of new triplets when adding 3 cards', 3 * scipy.comb( 12, 2) + scipy.comb(3, 2) * 12 + 1 # save import cPickle f = open('more_set_results.pickle', 'w') cPickle.dump((nsolutions_indep, nsolutions, n15), f) f.close()