def test_pmf_accuracy(): """Compare accuracy of the probability mass function. Compare the results with the accuracy check proposed in [Hong2013]_, equation (15). """ [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2) [n1, n2, n3] = np.random.random_integers(1, 10, size=3) nn = n1 + n2 + n3 l1 = [p1 for i in range(n1)] l2 = [p2 for i in range(n2)] l3 = [p3 for i in range(n3)] p = l1 + l2 + l3 b1 = binom(n=n1, p=p1) b2 = binom(n=n2, p=p2) b3 = binom(n=n3, p=p3) k = np.random.randint(0, nn + 1) chi_bn = 0 for j in range(0, k+1): for i in range(0, j+1): chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j) pb = PoiBin(p) chi_pb = pb.pmf(k) assert np.all(np.around(chi_bn, decimals=10) == np.around(chi_pb, decimals=10))
def down_fade_gap_inner(N, tablefunc, target, tSNR_range, rSNR1_range, rSNR2, pa2=10**(-10)): for tSNR in tSNR_range: h2 = 10**((rSNR2 - tSNR)/10) pf2 = 1 - np.exp(-h2) # p2 = pf2 + (1-pf2)*pa2 for rSNR1 in rSNR1_range: pa1 = tablefunc(rSNR1) h1 = 10**((rSNR1 - tSNR)/10) # linear fade pf1 = 1 - np.exp(-h1) # pf2c = 1 pf2c = 1 - np.exp(h1-h2) if h2 > h1 else 0 rv_g = binom(N, 1 - pf1) result = 0 # rv_g.pmf(0) for g in xrange(1, N+1, 1): rv_a = binom(g, 1 - pa1) a_range = np.arange(0, g+1, 1) qpf2 = np.power(pf2, a_range) qE = qpf2 + (1-qpf2)*pa2 qB = qpf2 * pf2c + (1 - qpf2*pf2c)*pa2 # qB = qE * (pf2c + (1 - pf2c)*pa2) psuccess = (1-qE)**(N-g) * np.power((1-qB), g-a_range) z = rv_g.pmf(g) * np.dot(rv_a.pmf(a_range), 1-psuccess) result += z if result > target: break if result < target: return np.array([N, tSNR, rSNR1, rSNR2])
def show_binomial(): """Show an example of binomial distributions""" bd1 = stats.binom(20, 0.5) bd2 = stats.binom(20, 0.7) bd3 = stats.binom(40, 0.5) k = np.arange(40) sns.set_context('paper') sns.set_style('ticks') mystyle.set(14) markersize = 8 plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize) plt.hold(True) plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize) plt.plot(k, bd3.pmf(k), 's-g', ms=markersize) plt.title('Binomial distribuition') plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40']) plt.xlabel('X') plt.ylabel('P(X)') sns.despine() mystyle.printout_plain('Binomial_distribution_pmf.png') plt.show()
def __init__(self, ngene, c=75, npool=4, epsilon=0.01, cutoffs=range(1,26), ntotal=4e+06, mutFac=3.): self.pmf = stats.binom(c, (1. - mutFac * epsilon) / npool) self.pmf2 = stats.binom(c, epsilon) self.pFail = self.pmf.cdf(numpy.array(cutoffs) - 1) # cdf() offset by one self. lambdaError = (float(ntotal) / ngene) \ * self.pmf2.sf(numpy.array(cutoffs) - 1) self.ngene = ngene
def UpdateK(k_old_list,z_old_list,T,lambda_old_list,rou_list,u_list,phi_list,alpha_hat,iterNum,eta): k_new_list = [] z_new_list = [] for ii in range(len(k_old_list)): #同phi,降维 k_old = copy.deepcopy(k_old_list[ii]) z_old = copy.deepcopy(z_old_list[ii]) lambda_old = copy.deepcopy(lambda_old_list[ii]) rou = copy.deepcopy(rou_list[ii]) u = copy.deepcopy(u_list[ii]) phi = copy.deepcopy(phi_list[ii]) k_new = [] z_new = [] for t in range(T-1): dK = stats.binom(1,0.5) temp = dK.rvs(1) d_k = 0 if temp[0] ==0: d_k = 1 else: d_k = -1 epsilon = stats.geom.rvs(1.0/(1+z_old[t]),loc=-1,size=1) k_new_temp = k_old[t]+d_k*epsilon[0] # step 3 and step 4 if k_new_temp < 0: k_new.append(k_old[t]) z_new.append(z_old[t]) else: p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\ phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t])) p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\ phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp)) ap = min(1,p_k_new/p_k) y_AP = stats.binom(1,ap) temp = y_AP.rvs(1) if temp[0] ==1: k_new.append(k_new_temp) else: k_new.append(k_old[t]) temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha_hat) z_new.append(temp_z) k_new_list.append(k_new) z_new_list.append(z_new) return k_new_list, z_new_list
def get_yield_params(npool=4, c=75, epsilon=0.01, nmut=50, ngene=4200, ntotal=4.64e+06, mutFac=3, **kwargs): pmfMut = stats.binom(c, (1. - mutFac * epsilon) / npool) pmfErr = stats.binom(c, epsilon) def no_false_positives(cut): return pmfErr.sf(cut) * ntotal < 0.05 i = find_cutoff(0, c, no_false_positives) + 1 def too_many_false_negatives(cut): return pmfMut.cdf(cut) * nmut > 0.05 j = find_cutoff(0, c, too_many_false_negatives) return pmfMut, pmfErr, i, j
def UpdateK(k_old,z_old,T,lambda_old,rou,u,phi,alpha,iterNum,eta): k_new = [] z_new = [] for t in range(T-1): dK = stats.binom(1,0.5) temp = dK.rvs(1) d_k = 0 if temp[0] ==0: d_k = 1 else: d_k = -1 epsilon_K = stats.geom(1.0/(1+z_old[t])) epsilon = epsilon_K.rvs(1) k_new_temp = k_old[t]+d_k*epsilon[0] # step 3 if k_new_temp < 0: k_new.append(k_old[t]) z_new.append(z_old[t]) else: p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\ phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t])) p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\ phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp)) ap = min(1,p_k_new/p_k) y_AP = stats.binom(1,ap) temp = y_AP.rvs(1) if temp[0] ==0: k_new.append(k_new_temp) else: k_new.append(k_old[t]) temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha) z_new.append(temp_z) # step 4 print "k z new:\n" print k_new print z_new print "\n" return k_new, z_new
def UpdateK_sigma(k_sigma_old,z_sigma_old,lambda_sigma,rou_sigma,u_sigma,sigma_2,iterNum,eta,alpha,T): k_sigma_new = [] z_sigma_new = [] for t in range(T-1): dK = stats.binom(1,0.5) temp = dK.rvs(1) d_k = 0 if temp[0] ==0: d_k = 1 else: d_k =-1 epsilon_K = stats.geom(1.0/(1+z_sigma_old[t])) epsilon = epsilon_K.rvs(1) k_sigma_new_temp = k_sigma_old[t]+d_k*epsilon[0] # step 3 if k_sigma_new_temp < 0: k_sigma_new.append(k_sigma_old[t]) z_sigma_new.append(z_sigma_old[t]) else: # step 2 p_k_sigma = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]*\ (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]\ *(sigma_2[t+1])**k_sigma_old[t]/(math.factorial(k_sigma_old[t])*spec.gamma(lambda_sigma+k_sigma_old[t])) p_k_sigma_new = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp*\ (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp\ *(sigma_2[t+1])**k_sigma_new_temp/(math.factorial(k_sigma_new_temp)*spec.gamma(lambda_sigma+k_sigma_new_temp)) ap = min(1,p_k_sigma_new/p_k_sigma) y_AP = stats.binom(1,ap) temp = y_AP.rvs(1) if temp[0] ==0: k_sigma_new.append(k_sigma_new_temp) else: k_sigma_new.append(k_sigma_old[t]) # step 4 temp_z = z_sigma_old[t]+ iterNum**(-1.0*eta)*(ap-alpha) z_sigma_new.append(temp_z) return k_sigma_new,z_sigma_new
def birth(M): if M == 0: return 0 B1 = binom(N - M - 1, p * eta / N) B2 = binom(M, (1 - p) * eta / N) pmf1, pmf2 = B1.pmf, B2.pmf p2 = [pmf2(m) for m in range(M)] p2.reverse() p2s = list(np.cumsum(p2)) p2s.reverse() b = 0.0 for l in range(N - M): t = p2s[l + 1] if l + 1 < M else 0 b += t * pmf1(l) return (N - M) / N * b
def test_entropy(self): # Basic entropy tests. b = stats.binom(2, 0.5) expected_p = np.array([0.25, 0.5, 0.25]) expected_h = -sum(xlogy(expected_p, expected_p)) h = b.entropy() assert_allclose(h, expected_h) b = stats.binom(2, 0.0) h = b.entropy() assert_equal(h, 0.0) b = stats.binom(2, 1.0) h = b.entropy() assert_equal(h, 0.0)
def llr(c1, c2, c12, n): # H0: Independence p(w1,w2) = p(w1,~w2) = c2/N p0 = c2 / n # H1: Dependence, p(w1,w2) = c12/N p10 = c12 / n # H1: p(~w1,w2) = (c2-c12)/N p11 = (c2 - c12) / n # binomial probabilities # H0: b(c12; c1, p0), b(c2-c12; N-c1, p0) # H1: b(c12, c1, p10), b(c2-c12; N-c1, p11) probs = np.matrix([ [binom(c1, p0).logpmf(c12), binom(n - c1, p0).logpmf(c2 - c12)], [binom(c1, p10).logpmf(c12), binom(n - c1, p11).logpmf(c2 - c12)]]) # LLR = p(H1) / p(H0) return np.sum(probs[1, :]) - np.sum(probs[0, :])
def add_edges_exact(g, nodes, p, weighted=None, links=None): """Add edges """ if not links: links = set(frozenset((a, b)) for a in nodes for b in nodes if a != b) assert len(links) == len(nodes) * (len(nodes)-1) / 2 else: assert not nodes if weighted == 'exact': #for x in links: # print x # a, b = x e = 0 for a,b in links: g.add_edge(a, b, weight=None) g.edge[a][b].setdefault('weights', []).append(p) assert g.edge[b][a]['weights'] == g.edge[a][b]['weights'] #print g.edge[a][b]['weight'] e += 1 #print p, len(nodes), len(g.edges()), e #raw_input() return links = list(links) #nNodes = len(nodes) #nEdges = int(round(len(links) * p)) nEdges = binom(len(links), p).rvs() edges = random.sample(links, nEdges) e = 0 #for a,b in edges: # g.add_edge(a, b) # e += 1 g.add_edges_from(edges) e += len(edges)
def mcnemar_midp(b, c): """ Compute McNemar's test using the "mid-p" variant suggested by: M.W. Fagerland, S. Lydersen, P. Laake. 2013. The McNemar test for binary matched-pairs data: Mid-p and asymptotic are better than exact conditional. BMC Medical Research Methodology 13: 91. `b` is the number of observations correctly labeled by the first---but not the second---system; `c` is the number of observations correctly labeled by the second---but not the first---system. """ n = b + c x = min(b, c) dist = binom(n, .5) p = 2. * dist.cdf(x) midp = p - dist.pmf(x) chi = float(abs(b - c)**2)/n print "b = ", b print "c = ", c print "Exact p = ", p print "Mid p = ", midp print "Chi = ", chi
def UpdateLambda(lambda_star_old,tao_lambda_star_old,s_star,u_star,m,u,eta,alpha,iterNum): log_lambda_star_X = stats.norm(loc =np.log(lambda_star_old),scale =np.sqrt(tao_lambda_star_old)) log_lambda_star = log_lambda_star_X.rvs(1) lambda_star = np.exp(log_lambda_star[0]) temp =1.0 for item in u: temp = temp*item**lambda_star_old p_lambda_star = np.exp(-1.0*lambda_star_old/s_star)*(((lambda_star_old**lambda_star_old)/(u_star**lambda_star_old*spec.gamma(lambda_star_old)))**m)*np.exp(-1.0*lambda_star_old/u_star*sum(u))*temp temp =1.0 for item in u: temp = temp*item**lambda_star p_lambda_star_new = np.exp(-1.0*lambda_star/s_star)*(((lambda_star**lambda_star)/(u_star**lambda_star*spec.gamma(lambda_star)))**m)*np.exp(-1.0*lambda_star/u_star*sum(u))*temp ap = min(1,p_lambda_star_new/p_lambda_star) # step 3 y_AP = stats.binom(1,ap) lambda_star_new = lambda_star_old temp = y_AP.rvs(1) if temp[0] ==1: lambda_star_new = lambda_star # step 4 log_tao_lambda_star = np.log(tao_lambda_star_old)+iterNum**(-1.0*eta)*(ap-alpha) tao_lambda_star_new = np.exp(log_tao_lambda_star) print "finished a iteration." return lambda_star_new,tao_lambda_star_new
def fig2(): ''' Plot histogram and solve characteristics of probability_cut_nooverlapsability, that we cut fibers. Compare with binomial distribution. Set n_sim >> 1 ''' figure( 2 ) delta = 0. p = probability_cut_nooverlaps( spec.l_x, fib.lf, delta ) rvb = binom( fib.n, p ) rvp = poisson( fib.n * p ) rvn = norm( fib.n * p, sqrt( fib.n * p * ( 1 - p ) ) ) graph_from = floor( bin_mean - 4 * bin_stdv ) graph_to = floor( bin_mean + 4 * bin_stdv ) + 1 x = arange( graph_from , graph_to ) plot( x, n_sim * rvb.pmf( x ), color = 'red', linewidth = 2, label = 'Binomial' ) plot( x, n_sim * rvp.pmf( x ), color = 'green', linewidth = 2, label = 'Poisson' ) plot( x, n_sim * rvn.pdf( x ), color = 'blue', linewidth = 2, label = 'Normal' ) #plot( x, 20 * rv.pmf( x ) ) pdf, bins, patches = hist( v, n_sim, normed = 0 ) #, facecolor='green', alpha=1 #set_xlim( bin_mean - 2 * bin_stdv, bin_mean + 2 * bin_stdv ) #plot( sx, sy, 'rx' ) # centroids #print sum( pdf * diff( bins ) ) legend() draw()
def death(M): if M == N: return 0 B1 = binom(M - 1, p * eta / N) B2 = binom(N - M, (1 - p) * eta / N) pmf1, pmf2 = B1.pmf, B2.pmf p2 = [pmf2(l) for l in range(N - M + 1)] p2.reverse() d = 0.0 p2s = list(np.cumsum(p2)) p2s.reverse() for k in range(M): t = p2s[k + 1] if k + 1 < N - M + 1 else 0 d += pmf1(k) * t print M, pmf1(k), k, t, d return M / N * d
def option_price(P, R, q): pv = P[:, -1] n = size(pv) - 1 b = binom(n, 1 - q) vf = vectorize(lambda k: b.pmf(k)) qv = vf(range(n + 1)) return dot(qv, pv) / R ** n
def simulate_experiment(self, modelparams, expparams, repeat=1): # FIXME: uncommenting causes a slowdown, but we need to call # to track sim counts. #super(BinomialModel, self).simulate_experiment(modelparams, expparams) # Start by getting the pr(1) for the underlying model. pr1 = self.underlying_model.likelihood( np.array([1], dtype='uint'), modelparams, expparams['x'] if self._expparams_scalar else expparams) dist = binom( expparams['n_meas'].astype('int'), # ← Really, NumPy? pr1[0, :, :] ) sample = ( (lambda: dist.rvs()[np.newaxis, :, :]) if pr1.size != 1 else (lambda: np.array([[[dist.rvs()]]])) ) os = np.concatenate([ sample() for idx in range(repeat) ], axis=0) return os[0,0,0] if os.size == 1 else os
def _calc_score( fore_hit_size, fore_size, back_hit_size, back_size, prob_fn=None, ): if prob_fn is None: prob_fn = "hypergeom" assert prob_fn in ["hypergeom", "binom"] if back_hit_size <= 0: return 0 k = fore_hit_size n = fore_size K = back_hit_size N = back_size p = K / N if prob_fn == "hypergeom": binomial = stats.hypergeom(N, K, n) else: binomial = stats.binom(n, p) pr_gt_k = binomial.sf(k - 1) pr_lt_k = binomial.cdf(k) if pr_lt_k <= 0: return -200 elif pr_gt_k <= 0: return 200 else: return -np.log10(pr_gt_k / pr_lt_k)
def show_binomial(): """Show an example of binomial distributions""" ns = [20,20,40] ps = [0.5, 0.7, 0.5] #markersize = 8 for (p,n) in zip(ps, ns): bd = stats.binom(n,p) x = np.arange(n+1) plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n)) plt.legend() #sns.set_context('poster') #sns.set_style('ticks') #mystyle.set(14) plt.title('Binomial distribuition') plt.xlabel('X') plt.ylabel('P(X)') #sns.despine() plt.annotate('Upper Limit', xy=(20,0), xytext=(27,0.04), arrowprops=dict(shrink=0.05)) mystyle.printout_plain('Binomial_distribution_pmf.png') plt.show()
def add_edges_out_sparse(g, p, g_layers, weighted=False, edges_constructor=add_edges_exact, non_overlapping=False): #for g in g_layers: if len(g_layers) != 1: raise NotImplementedError("len(g_layers) > 1 in this function") cmtys = cmty.Communities.from_networkx(g_layers[0]) if not cmtys.is_non_overlapping(): raise NotImplementedError("overlapping in this function") # Total number of pairs of nodes nodes = g.nodes() n_links = len(g)*(len(g)-1) / 2 # subtract total number of links in communites. n_links -= sum(s*(s-1)/2 for c, s in cmtys.cmtysizes().iteritems()) n_links_wanted = binom(n_links, p).rvs() n_links_present = 0 node_dict = g_layers[0].node #from fitz import interact ; interact.interact() while n_links_present < n_links_wanted: a = random.choice(nodes) b = random.choice(nodes) #if any(g_.node[n1]['cmtys']&g_.node[n2]['cmtys'] for g_ in g_layers): if node_dict[a]['cmtys']&node_dict[b]['cmtys']: continue if g.has_edge(a, b): continue g.add_edge(a, b) n_links_present += 1
def test_pmf_pb_binom(): """Compare the probability mass function with the binomial limit case.""" # For equal probabilites p_j, the Poisson Binomial distribution reduces to # the Binomial one: p = [0.5, 0.5] pb = PoiBin(p) bn = binom(n=2, p=p[0]) # Compare to four digits behind the comma assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000) # For different probabilities p_j, the Poisson Binomial distribution and # the Binomial distribution are different: pb = PoiBin([0.5, 0.8]) bn = binom(2, p=0.5) assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
def BootstrapFD(samp): fd = FreqDist(samp) f1 = float(fd.Nr(1)) f2 = float(fd.Nr(2)) N = float(fd.N()) B = fd.B() # Undetected species & Coverage if f2 > 0.0: f0 = ceil(((N - 1.0) / N) * (f1 ** 2.0) / (2.0 * f2)) C = 1.0 - f1 / N * (N - 1.0) * f1 / ((N - 1.0) * f1 + 2.0 * f2) else: f0 = ceil(((N - 1.0) / N) * f1 * (f1 - 1.0) / 2.0) C = 1.0 - f1 / N * (N - 1.0) * f1 / ((N - 1.0) * f1 + 2.0) # Correct abundances probs = array(fd.values()) / N lambdah = (1 - C) / sum(probs * (1 - probs) ** N) probs = probs * (1 - lambdah * (1 - probs) ** N) # P for unseen # paux = (1-C)/f0 yield fd.values() popO = arange(B) dist = binom(n=N, p=1 - C) probsA = probs / sum(probs) while True: ns2 = dist.rvs() ns1 = int(N) - ns2 if ns1 > 0: samp1 = list(choice(popO, size=ns1, replace=True, p=probsA)) else: samp2 = [] if ns2 > 0: samp2 = list(random_integers(B, B + int(f0) - 1, ns2)) else: samp2 = [] yield FreqDist(samp1 + samp2).values()
def mcnemar_midp(b, c): n = b + c x = min(b, c) dist = binom(n, 0.5) p = 2.0 * dist.cdf(x) midp = p - dist.pmf(x) return midp
def sample_histogram(hist, factor=2, trim=None): if trim is None: if len(hist) > 300: trim = get_trim(hist) else: trim = max(hist) else: trim = min(max(hist), trim * factor) hist = {k: v for k, v in hist.items() if k < trim} h = defaultdict(int) prob = 1.0 / factor for i, v in hist.items(): if i < 100: b = binom(i, prob) probs = [b.pmf(j) for j in range(1, i + 1)] else: probs = poisson_dist(i * prob, i) for j, p in enumerate(probs): h[j + 1] += v * p h = dict(h) for i, v in h.items(): d = v - round(v) h[i] = ceil(v) if random.random() < d else floor(v) return {k: v for k, v in h.items() if v > 0} # remove 0 elements
def release_likelihood(amplitudes, available_vesicles, release_probability, mini_amplitude, mini_amplitude_stdev, measurement_stdev): """Return a measure of the likelihood that a synaptic response will have certain amplitude(s), given the state parameters for the synapse. Parameters ---------- amplitudes : array The amplitudes for which likelihood values will be returned available_vesicles : int Number of vesicles available for release release_probability : float Probability for each available vesicle to be released mini_amplitude : float Mean amplitude of response evoked by a single vesicle release mini_amplitude_stdev : float Standard deviation of response amplitudes evoked by a single vesicle release measurement_stdev : float Standard deviation of response amplitude measurement errors For each value in *amplitudes*, we calculate the likelihood that a synapse would evoke a response of that amplitude. Likelihood is calculated as follows: 1. Given the number of vesicles available to be released (nV) and the release probability (pR), determine the probability that each possible number of vesicles (nR) will be released using the binomial distribution probability mass function. For example, if there are 3 vesicles available and the release probability is 0.1, then the possibilities are: vesicles released (nR) probability 0 0.729 1 0.243 2 0.27 3 0.001 2. For each possible number of released vesicles, calculate the likelihood that this possibility could evoke a response of the tested amplitude. This is calculated using the Gaussian probability distribution function where µ = nR * mini_amplitude and σ = sqrt(mini_amplitude_stdev^2 * nR + measurement_stdev) 3. The total likelihood is the sum of likelihoods for all possible values of nR. """ amplitudes = np.array(amplitudes) likelihood = np.zeros(len(amplitudes)) release_prob = stats.binom(available_vesicles, release_probability) n_vesicles = np.arange(available_vesicles + 1) # probability of releasing n_vesicles given available_vesicles and release_probability p_n = release_prob.pmf(n_vesicles) # expected amplitude for n_vesicles amp_mean = n_vesicles * mini_amplitude # amplitude stdev increases by sqrt(n) with number of released vesicles amp_stdev = (mini_amplitude_stdev**2 * n_vesicles + measurement_stdev**2) ** 0.5 # distributions of amplitudes expected for n_vesicles amp_prob = p_n[None, :] * normal_pdf(amp_mean[None, :], amp_stdev[None, :], amplitudes[:, None]) # sum all distributions across n_vesicles likelihood = amp_prob.sum(axis=1) return likelihood
def __init__(self, alpha, beta, states=(0, 1), l=None): """ Define a 2-state Markov chain from its state-change probabilities. Parameters ---------- alpha, beta : float Probabilities for changing state; `alpha` is the probability for changing from the first state to the second, `beta` is the probability for changing from the second to the first. states : 2-tuple Labels for the two states; default is to label the first state with the integer 0, and the second with 1. The labels should be legitimate dict keys. l : int Length of simulated sample paths; can also be separately set or changed with path_length(). """ self.alpha = alpha self.beta = beta self.state_a, self.state_b = states # labels for states # Map state labels to 0, 1: self.s2int = {self.state_a : 0, self.state_b : 1} # Map 0, 1 to state labels: self.int2s = {0 : self.state_a, 1 : self.state_b} # Transition matrix; trans[j,i] = prob for move from j to i; # this is the right-multiplying form. self.trans = array([[1.-alpha, beta], [alpha, 1.-beta]]) # Transition samplers from states 0, 1; note that the binom param is # the "success" (state=1) probability: self.samplers = [stats.binom(1, self.trans[1,0]).rvs, stats.binom(1, self.trans[1,1]).rvs] # Equillibrium dist'n: self.p0_eq = beta/(alpha + beta) self.p1_eq = alpha/(alpha + beta) if l is not None: self.path_length(l) else: self.length = None
def show_binomial(): """Show an example of binomial distributions""" bd1 = stats.binom(20, 0.5) bd2 = stats.binom(20, 0.7) bd3 = stats.binom(40, 0.5) k = np.arange(40) plt.plot(k, bd1.pmf(k), 'o-b') plt.hold(True) plt.plot(k, bd2.pmf(k), 'd-r') plt.plot(k, bd3.pmf(k), 's-g') plt.title('Binomial distribition') plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40']) plt.xlabel('X') plt.ylabel('P(X)') plt.show()
def assert_counts_are_ok(idx_counts, p): # Here we test that the distribution of the counts # per index is close enough to a binomial threshold = 0.05 / n_splits bf = stats.binom(n_splits, p) for count in idx_counts: p = bf.pmf(count) assert_true(p > threshold, "An index is not drawn with chance corresponding " "to even draws")
def xor_analysis_new(N, tSNR, rSNRdu, rSNR3, p_a1=10**(-9), p_a2=10**(-9), p_a3=10**(-9)): if rSNRdu > tSNR: return 0 h_du = 10**((rSNRdu - tSNR)/10) # linear fade h_xor = 10**((rSNR3 - tSNR)/10) # Probability fade is bad p_f1 = p_f2 = 1 - np.exp(-h_du) p_f3 = 1 - np.exp(h_du-h_xor) if h_xor > h_du else 0 p_link_1 = p_f1 + (1 - p_f1) * p_a1 p_link_2 = p_f2 + (1 - p_f2) * p_a2 result = 0 rv_gc = binom(N, 1 - p_f1) for gc in range(0, N+1): rv_ad = binom(gc, 1 - p_a1) for ad in range(0, gc+1): rv_ad_tilde = binom(ad, 1 - p_a2) bu = gc - ad rv_bu = binom(ad, 1-p_link_2) kad = np.arange(1, ad+1) s_bu_tilde = (1 - p_f3) + p_f3 * np.dot(rv_bu.pmf(kad), 1-np.power(p_f3, kad)) if p_f3 else 1 # s_bu_tilde = (1 - p_f3) + p_f3 * sum([rv_bu.pmf(k) * (1 - p_f3**k) for k in range(1, ad+1)]) if p_f3 else 1 q_bu_tilde = (1 - s_bu_tilde) + s_bu_tilde * p_a3 for ad_tilde in range(0, ad+1): rv_ad_tilde_s = binom(ad_tilde, 1 - p_f3) rv_ad_hat_s = binom(ad - ad_tilde, 1 - p_f3) # if p_f3 = 0 then ad_tilde_s should = ad_tilde because ad_tilde_i should be empty for ad_tilde_s in range(0 if p_f3 else ad_tilde, ad_tilde+1): # ad_tilde already succeeded # if p_f3 = 0 then ad_hat_s should = ad - ad_tilde because ad_hat_i should be empty for ad_hat_s in range(0 if p_f3 else ad-ad_tilde, ad-ad_tilde+1): ad_s = ad_tilde_s + ad_hat_s ad_i = ad - ad_tilde_s - ad_hat_s # ad - ad_s rv_ads = binom(ad_s, 1 - p_link_2) rv_adi = binom(ad_i, 1 - p_link_2) q_ad_hat_s = p_a3 q_ad_hat_i = p_link_2**ad_s + (1 - p_link_2**ad_s) * p_a3 # This is the problem zone that doesn't support vectorization (?) ks, ki = np.arange(1, ad_s+1), np.arange(0, ad_i+1) f_e = p_link_2**ad_s + (np.dot(rv_ads.pmf(ks), np.power(p_f3, ks)) * np.dot(rv_adi.pmf(ki), np.power(p_f3, ki)) if p_f3 else 0) q_e = f_e + (1 - f_e) * (1 - (1 - p_a3)**2) rv_bu_tilde = binom(bu, 1 - p_a2) bu_tilde = np.arange(0, bu+1) log_pstates = np.log10(rv_gc.pmf(gc)) + np.log10(rv_ad.pmf(ad)) + np.log10(rv_ad_tilde.pmf(ad_tilde)) + np.log10(rv_ad_tilde_s.pmf(ad_tilde_s)) + np.log10(rv_ad_hat_s.pmf(ad_hat_s)) bu_tilde_res = np.dot(rv_bu_tilde.pmf(bu_tilde), np.multiply(np.power(1 - q_e, N-ad-bu_tilde), np.power(1 - q_bu_tilde, bu_tilde))) res = 10**log_pstates * (1 - q_ad_hat_s)**ad_hat_s * (1 - q_ad_hat_i)**(ad - ad_tilde - ad_hat_s) * bu_tilde_res result += res # print res, result # print gc, ad, ad_tilde, ad_tilde_s, ad_hat_s, bu_tilde # print "\n" return result
def forward_summing(P, player, alpha, beta, gamma): """ Args ---- P : np.array position specific elite transition matrix player : pd.DataFrame.GroupBy Player-year groupby pandas dataframe alpha : np.array position specific intercepts beta : np.array park specific coef gamma : np.array position specific spline coefs Returns ------ M a transition matrix thingy """ # Get the position for the first year for the player pos_0 = player.position_main[0] # Initial transition matrix based on position init_P = P[pos_0, :, :] years = len(player) pi = np.array([init_P[0, 1], init_P[1, 0]]) M = np.zeros((years, 2)) M[0, :] = pi for yr in range(1, years): pos_n = player.position_main[yr] new_P = P[pos_n, :, :] Rtemp = M[yr - 1, :].dot(new_P).reshape(1, -1) age = player.playerAge[yr] ab = player.AB[yr] hrs = player.HR[yr] age_traj_val = age_trajectory(age, gamma[pos_n]) theta0 = theta(alpha[pos_n, 0], beta[pos_n], age_traj_val) theta1 = theta(alpha[pos_n, 1], beta[pos_n], age_traj_val) p0 = stats.binom(n=ab, p=theta0).pmf(k=hrs) p1 = stats.binom(n=ab, p=theta1).pmf(k=hrs) # does it make sense to make these values arbitrarily small # should something else happen here? if p0 == 0: p0 = 1e-32 if p1 == 0: p1 = 1e-32 Rtemp *= np.array([p0, p1]) Rtemp /= Rtemp.sum() M[yr, :] = Rtemp return M
def expected_distn_unbiased(self): rv = scs.binom(self.n, self.p) mu = rv.mean() sd = rv.std() print("The expected distribution for a fair coin is mu=%s, sd=%s"%(mu,sd))
## declare variables font_size = 11 font_name = 'sans-serif' n = 10000 fig = plt.figure(figsize=(10, 6), dpi=300) splot = 0 ## looxp through parameterizations of the beta for n, p in [(5, 0.25), (5, 0.5), (5, 0.75)]: splot += 1 ax = fig.add_subplot(1, 3, splot) x = np.arange(scs.binom.ppf(0.01, n, p), scs.binom.ppf(0.99, n, p)) ax.plot(x, scs.binom.pmf(x, n, p), 'bo', ms=8, label='pmf') ax.vlines(x, 0, scs.binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5) rv = scs.binom(n, p) ax.set_ylim((0, 1.0)) ax.set_xlim((-0.5, 4.5)) ax.set_title("n=%s,p=%s" % (n, p)) ax.set_aspect(1. / ax.get_data_ratio()) for t in ax.get_xticklabels(): t.set_fontsize(font_size - 1) t.set_fontname(font_name) for t in ax.get_yticklabels(): t.set_fontsize(font_size - 1) t.set_fontname(font_name) plt.savefig("binomial.png", dpi=400) plt.show()
print(dta[[ 'NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP', 'PERMINTE' ]].head(10)) print(dta[[ 'AVYRSEXP', 'AVSALK', 'PERSPENK', 'PTRATIO', 'PCTAF', 'PCTCHRT', 'PCTYRRND' ]].head(10)) formula = 'NABOVE + NBELOW ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT ' formula += '+ PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF' # #### Aside: Binomial distribution # Toss a six-sided die 5 times, what's the probability of exactly 2 fours? stats.binom(5, 1. / 6).pmf(2) from scipy.misc import comb comb(5, 2) * (1 / 6.)**2 * (5 / 6.)**3 from statsmodels.formula.api import glm glm_mod = glm(formula, dta, family=sm.families.Binomial()).fit() print(glm_mod.summary()) # The number of trials glm_mod.model.data.orig_endog.sum(1) glm_mod.fittedvalues * glm_mod.model.data.orig_endog.sum(1)
def plotear(U, E, G, N, P, B, PS, H, EM): # distribuciones continuas # -------------Graficar uniforme--------------- numerosUniformes = ss.uniform.rvs(size=1000, loc=1, scale=3) sns.kdeplot(numerosUniformes, label="Distribución esperada") sns.distplot(U, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribución Uniforme") plt.legend(loc="upper left") plt.show() # -------------Graficar Exponencial--------------- numerosExponenciales = ss.expon.rvs(size=1000, loc=0, scale=1) sns.kdeplot(numerosExponenciales, label="Distribución esperada") sns.distplot(E, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribución Exponencial") plt.legend(loc="upper left") plt.show() # -------------Graficar Gamma--------------- plt.title("Distribución Gamma") plt.hist(G, alpha=1, edgecolor='black') plt.show() # -------------Graficar Normal--------------- numerosNormales = ss.norm.rvs(size=1000, loc=2.35, scale=30) sns.kdeplot(numerosNormales, label="Distribución esperada") sns.distplot(N, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribución Normal") plt.legend(loc="upper left") plt.show() # distribuciones discretas # -------------Graficar Pascal--------------- plt.title("Distribución Pascal") plt.hist(P, alpha=1, edgecolor='black') plt.show() #------------ Graficar binomial-------------- N, p = 30, 0.4 # parametros de forma binomial = ss.binom(N, p) # Distribución x = np.arange(binomial.ppf(0.01), binomial.ppf(0.99)) fmp = binomial.pmf(x) # Función de Masa de Probabilidad plt.plot(x, fmp, '--', label="Distribución esperada") sns.distplot(B, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribución Binomial") plt.ylabel('probabilidad') plt.xlabel('valores') plt.legend(loc="upper left") plt.show() # -------------Graficar Poisson--------------- poisson = ss.poisson(3.6) xLine = np.arange(poisson.ppf(0.01), poisson.ppf(0.99)) fmp = poisson.pmf(xLine) plt.plot(xLine, fmp, label="Distribución esperada") sns.distplot(PS, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribución de Poisson") plt.legend(loc="upper left") plt.show() # -------------Graficar Hiper--------------- plt.title("Distribución Hipergeometrica") plt.hist(H, alpha=1, edgecolor='black') plt.show() #-------------Graficar empirica------------ sns.distplot(EM, hist_kws=dict(edgecolor="k"), label="Distribución observada") plt.title("Distribucion Empirica") plt.show()
num_workers=config.opts.cpus, shuffle=shuffle, pin_memory='cuda' in str(config.opts.device), worker_init_fn=lambda _: np.random.seed( int(torch.initial_seed()) % (2**32 - 1))) dataset_train: NodeCountDataset = torch.load(folder_data / 'train.pt') dataset_val: NodeCountDataset = torch.load(folder_data / 'val.pt') dataset_test: NodeCountDataset = torch.load(folder_data / 'test.pt') dataloader_train = make_dataloader(dataset_train, shuffle=True) dataloader_val = make_dataloader(dataset_val, shuffle=False) dataloader_test = make_dataloader(dataset_test, shuffle=False) if dataset_train.informative_features > 0: binom = stats.binom(n=dataset_train.max_nodes, p=.5**dataset_train.informative_features) def weight_fn(targets): return targets.new_tensor(-binom.logpmf(targets.cpu().numpy())) else: def weight_fn(targets): return torch.ones_like(targets) epoch_bar_postfix = {} epoch_start = train_state.epochs + 1 epoch_end = train_state.epochs + 1 + config.training.epochs epoch_bar = tqdm.trange(epoch_start, epoch_end, desc='Training',
def pipeline(ax, model, n, lo, value, threshold): plot_model(ax, model, n, lo, value) p = calc_p_value(model, value) if p < threshold: print('Reject Null Hypothesis') else: print('Fail to Reject Null Hypothesis') if __name__ == "__main__": # Part 2 #1 Null hypothesis is that Muriel cannot tell if the water was poured before of # after the milk. Binomial. Faled to reject null p = 30.4% binomial = stats.binom(n=137, p=0.5) fig, ax = plt.subplots() pipeline(ax, binomial, 137, 40, 72, 0.05) #2 Null hypothesis is that we haven't progressed on our heelflip capability # Rejected null p = 2.8% n = 122 value = 72 p = 0.5 threshold = .05 binomial = stats.binom(n=n, p=p) fig, ax = plt.subplots()
from scipy.stats import binom n = 4 p = 4 / 5 rv = binom(n, p) # more than 2 hits => P(X>2) = P(X=3) + P(X=4) P0 = rv.pmf(3) + rv.pmf(4) print('%.3f' % P0) # at least 3 misses => P(X<=1) = P(X=0) + P(X=1) P1 = rv.pmf(0) + rv.pmf(1) print('%.3f' % P1)
cars_x.value_counts() p_no_cars_x = (cars_x == 0).mean() p_at_least_3_x = (cars_x >= 3).mean() p_at_least_1_x = (cars_x >= 1).mean() grades = stats.norm(3, .3) top_5 = grades.isf(.05) third_dec_top = grades.isf(.7) third_dec_bot = grades.ppf(.2) grades_x = .3 * np.random.randn(1000000) + 3 top_5_x = np.percentile(grades_x, 95) third_dec_top_x = np.percentile(grades_x, 30) third_dec_bot_x = np.percentile(grades_x, 20) clicks = stats.binom(4326, .02) at_least_97 = clicks.sf(96) clicks_x = np.random.binomial(4326, .02, 1000000) at_least_97_x = (clicks_x >= 97).mean() in_first_60 = stats.binom(60, .01).sf(0) in_first_60_x = (np.random.binomial(60, .01, 1000000) >= 1).mean() n_visitors = round(.9 * 22 * 3) clean_up = stats.binom(n_visitors, .03) p_day_clean = clean_up.sf(0) p_2_unclean = clean_up.cdf(0)**2 p_week_unclean = clean_up.cdf(0)**5
def get_p_value(n, p, x): dist = st.binom(n=n, p=p) cdf = dist.cdf return (cdf(x) - cdf(4) + cdf(n-5) - cdf(n-x-1)) / (cdf(n-5) - cdf(4)) if x < n/2 else 1
sns.set_style("whitegrid") n_params = [1, 2, 4] p_params = [0.25, 0.5, 0.75] x = np.arange(0, max(n_params) + 1) fig, ax = plt.subplots(len(n_params), len(p_params), sharex=True, sharey=True) for i in range(len(n_params)): for j in range(len(p_params)): n = n_params[i] p = p_params[j] y = stats.binom(n=n, p=p).pmf(x) ax[i, j].vlines(x, 0, y, colors="b", lw=5) ax[i, j].set_ylim(0, 1) ax[i, j].plot(0, 0, label="n = {:3.2f}\np={:3.2f}".format(n, p), alpha=0) ax[i, j].legend(fontsize=12) ax[2, 1].set_xlabel(r"$\theta$", fontsize=14) ax[1, 0].set_ylabel(r"$p(y|\theta)$", fontsize=14) ax[0, 0].set_xticks(x) plt.tight_layout() plt.savefig("plots/binomial_distribution.png")
import distfit # print(distfit.__version__) # print(dir(distfit)) # %% Multiple distributions as input from distfit import distfit X = np.random.normal(0, 2, 10000) y = [-8, -6, 0, 1, 2, 3, 4, 5, 6] dist = distfit(stats='RSS', distr=['norm', 't', 'gamma']) results = dist.fit_transform(X) # %% Discrete example from distfit import distfit from scipy.stats import binom # Generate random numbers X = binom(8, 0.5).rvs(1000) dist = distfit(method='discrete', f=1.5, weighted=True, stats='wasserstein') model = dist.fit_transform(X, verbose=3) dist.plot() # Make prediction results = dist.predict([0, 1, 10, 11, 12]) dist.plot() # Generate samples Xgen = dist.generate(n=1000) dist.fit_transform(Xgen) results = dist.predict([0, 1, 10, 11, 12]) dist.plot()
def func(self, x): return binom(self.n, self.p).pmf(x)
def _reset_distribution(self): self._distribution: rv_discrete = binom(self._n, self._p)
def __init__(self, num_trials, probability): self.num_trials = num_trials self.probability = probability self.expectation = num_trials * probability self.standard_deviation = math.sqrt(self.expectation * (1 - probability)) self._binomial = stats.binom(num_trials, probability)
def p_y_theta(y, n, theta): rv = binom(n, theta) return rv.pmf(y)
# распределение Бернулли # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.bernoulli.html bernoulli_rv = sts.bernoulli(0.7) b = bernoulli_rv.rvs(300) print(abs(np.sum(b) - 300 * 0.7) / 300) # биномиальное распределение # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binom.html binomial_rv = sts.binom(20, 0.9) x = np.linspace(0, 20, 21) cdf = binomial_rv.cdf(x) plt.step(x, cdf) plt.ylabel('$F(x)$') plt.xlabel('$x$') pmf = binomial_rv.pmf(x) plt.plot(x, pmf, 'o') plt.ylabel('$P(X=x)$') plt.xlabel('$x$') # распределение Пуассона
# Define the distribution parameters to be plotted n_values = [40, 4] b_values = [0.35, 0.35] linestyles = ['-', '--'] color = ['red', 'blue'] color2 = ['yellow', 'green'] x = np.arange(-1, 200) #------------------------------------------------------------ # plot the distributions fig, ax = plt.subplots(figsize=(5, 3.75)) for (n, b, ls, col, col2) in zip(n_values, b_values, linestyles, color, color2): # create a binomial distribution dist = binom(n, b) poi = poisson(n * b) mu = dist.mean() std = dist.std() mu2 = poi.mean() std2 = poi.std() plt.plot(x, dist.pmf(x), ls=ls, c=col, label=r'$b=%.2f,\ n=%i$' % (b, n), linestyle='steps-mid') plt.plot(x, poi.pmf(x), ls=ls,
def test_inf_domain(self, domain): with pytest.raises(ValueError, match=r"must be finite"): DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain)
sampleVariance = runningSum / N MEAN = P STANDARD_DEVIATION = math.sqrt(sampleVariance) MEDIAN = 1 data = { "Sample Mean": MEAN, "Sample Variance": sampleVariance, "Sample Standard Deviation": STANDARD_DEVIATION, "Sample Median": MEDIAN, "N": N, "P": P } writeJsonFile('reports/statistics.json', data) from scipy.stats import binom import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) x = range(N - 1) rv = binom(N, P) print(N, P) expected_rb = binom(N, 0.5) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=10) ax.legend(loc='best', frameon=False) plt.suptitle('Probablity of Explicitily Typed Languages') plt.ylabel("pmf") plt.xlabel("# of projects that are explicitly typed") plt.show()
#!/usr/bin/env python from __future__ import print_function import argparse import sys import math from scipy.stats import binom if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-n', '--nalleles', type=int, default=2) args = parser.parse_args() with args.infile as f: for line in f.readlines(): line = line.strip() items = line.split() n = int(items[0]) dist = binom(2 * n, 0.5) bsf = [math.log10(binom.sf(i, 2 * n, 0.5)) for i in range(2 * n)] print(" ".join([str(r) for r in bsf]))
a = hg.cdf(0) print("a:", a) M = 25 n = 3 N = 1 hg = stats.hypergeom(M, n, N) b = hg.cdf(1) - hg.cdf(0) print("b:", b) #Ejercicio 5.37 ---------------------------------------- print("\nEJERCICIO 5.37 *************************************") n = 3 p = 3 / 25 bi = stats.binom(n, p) a = bi.cdf(0) print("a:", a) n = 3 p = 1 / 25 bi = stats.binom(n, p) b = bi.cdf(3) - bi.cdf(0) print("b:", b) #Ejercicio 5.39 print("\nEJERCICIO 5.39 *************************************") n = 10 p = .5 bi = stats.binom(n, p) a = 1 - bi.cdf(2)
print("Original probablity for exact 5 'D':", P) print("Probablity using built in func", stats.binom.pmf(5, 50, 1 / 5)) fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = stats.binom.stats(Number_of_question, P, moments='mvsk') x = np.arange(stats.binom.ppf(0.01, Number_of_question, P), stats.binom.ppf(0.99, Number_of_question, P)) ax.plot(x, stats.binom.pmf(x, Number_of_question, P), 'bo', ms=8, label='binom pmf') ax.vlines(x, 0, stats.binom.pmf(x, Number_of_question, P), colors='b', lw=5, alpha=0.5) rv = stats.binom(Number_of_question, P) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show()
''' Online Python Compiler. Code, Compile, Run and Debug python program online. Write your code in this editor and press "Run" button to execute it. ''' import math from scipy import stats X = stats.binom(6, 2 / 3) # Declare X to be a binomial random variable print( "Probability that in the next 6 trials, there will be atleast 4 successes") print(X.pmf(4) + X.pmf(5) + X.pmf(6), "\n") # P(X = 0)
# A marketing website has an average click-through rate of 2%. # One day they observe 4326 visitors and 97 click-throughs. # How likely is it that this many people or more click through? ''' mean click through rate = .02 observed 4326 visitors ''' n = 4326 prob = .02 dist = binom(n, prob) dist.sf(97) trials = rows = 10_000 samples = cols = 4326 data = np.random.uniform(1, 101, samples * trials).reshape(rows, cols) data = pd.DataFrame(data) (((data < 3).sum(axis=1)) >= 97).sum() / trials # You are working on some statistics homework consisting of 100 questions # where all of the answers are a probability rounded to the hundreths place.
def sample(self, N=None): return binom(self.n, self.p).rvs(N, random_state=self.random)
def peak_plot(peak, sample_table=None, max_dist=None, norm_on_center=True, log_y=True, marker_list=None, color_list=None, guidelines=None, guideline_colors=None, legend_off=False, legend_col=2, ax=None, figsize=None, save_fig_to=None): """Plot the distribution of spike_in peak Plot a scatter-line plot of [adjusted] number of sequences with i edit distance from center sequence (spike-in seq) Args: peak (Peak): a Peak instance sample_table (pd.DataFrame): abundance of sequence in samples. With samples as columns. If None, try `peak.seqs` max_dist (int): maximum distance to survey. If None, try `peak.radius` norm_on_center (bool): if the counts/abundance are normalized to the peak center log_y (bool): if set the y scale as log marker_list (list of str): overwrite default marker scheme if not `None`, same length and order as samples in sample_table color_list (list of str): overwrite default color scheme if not `None`, same length and order as samples in sample_table guidelines (list of float): add a series of guidelines of the peak shape with certain mutation rates, optional guideline_colors (list of color): the color of guidelines, same shape as guidelines legend_off (bool): do not show the legend if True legend_col (int): number of col for legend if show ax (matplotlib.Axis): if use external ax object to plot. Create a new figure if None figsize (2-tuple): size of the figure save_fig_to (str): save the figure to file if not None Returns: ax for plotted figure """ import numpy as np if sample_table is None: if isinstance(peak.seqs, pd.DataFrame): sample_table = peak.seqs else: logging.error('Please indicate sample_table') raise ValueError('Please indicate sample_table') if max_dist is None: if peak.radius is None: logging.error('Please indicate the maximum distance to survey') raise ValueError('Please indicate the maximum distance to survey') else: max_dist = peak.radius if marker_list is None: marker_list = Presets.markers(num=sample_table.shape[1], with_line=True) elif len(marker_list) != sample_table.shape[1]: logging.error( 'Error: length of marker_list does not align with the number of valid samples to plot' ) raise Exception( 'Error: length of marker_list does not align with the number of valid samples to plot' ) if color_list is None: color_list = Presets.color_tab10(num=sample_table.shape[1]) elif len(color_list) != sample_table.shape[1]: logging.error( 'Error: length of color_list does not align with the number of valid samples to plot' ) raise Exception( 'Error: length of color_list does not align with the number of valid samples to plot' ) if ax is None: if figsize is None: figsize = (max_dist / 2, 6) if legend_off else (max_dist / 2 + 5, 6) fig, ax = plt.subplots(1, 1, figsize=figsize) rel_abun, _ = peak.peak_abun(max_radius=max_dist, table=sample_table, use_relative=norm_on_center) for sample, color, marker in zip(sample_table.columns, color_list, marker_list): ax.plot(rel_abun.index, rel_abun[sample], marker, color=color, label=sample, ls='-', alpha=0.5, markeredgewidth=2) if log_y: ax.set_yscale('log') ylim = ax.get_ylim() # add guide line if applicable if guidelines is not None: if not norm_on_center: logging.warning( 'Can only add guidelines if peaks are normed on center, skip guidelines' ) else: # assuming a fix error rate per nt, iid on binom from scipy.stats import binom if isinstance(guidelines, (float, int)): err_guild_lines = [guidelines] if guideline_colors is None: guideline_colors = Presets.color_tab10(num=len(guidelines)) dist_series = np.arange(max_dist + 1) for ix, (p, color) in enumerate(zip(guidelines, guideline_colors)): rv = binom(len(peak.center_seq), p) pmfs = np.array([rv.pmf(x) for x in dist_series]) pmfs_normed = pmfs / pmfs[0] ax.plot(dist_series, pmfs_normed, color=color, ls='--', alpha=(ix + 1) / len(guidelines), label=f'p = {p}') ax.set_ylim(ylim) y_label = '' if norm_on_center: y_label += ' normed' y_label += ' counts' ax.set_ylabel(y_label.title(), fontsize=14) ax.set_xlabel('Distance to peak center', fontsize=14) if not legend_off: ax.legend(loc=[1.02, 0], fontsize=9, frameon=False, ncol=legend_col) plt.tight_layout() if save_fig_to: fig = plt.gcf() fig.patch.set_facecolor('none') fig.patch.set_alpha(0) plt.savefig(save_fig_to, bbox_inches='tight', dpi=300) return ax
def bin_sf(cov, mc, p): if cov > mc: return stats.binom(cov, p).sf(mc) else: # cov == mc, sf = 0 return 0
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Apr 24 00:05:59 2017 @author: ericcacciavillani """ import scipy.stats as ss n = 15 # Number of total bets p = .4 # Probability of event max_sbets = 3 # Maximum number of successful bets hh = ss.binom(n, p) total_p = 0 for k in range(1, max_sbets + 1): # DO NOT FORGET THAT THE LAST INDEX IS NOT USED total_p += hh.pmf(k) print(total_p)
plt.figure(figsize=(12, 8)) for i, lambda_ in enumerate([1, 2, 4, 6]): plt.plot(k, poisson.pmf(k, lambda_), '-o', label=lambda_, color=colors[i]) plt.fill_between(k, poisson.pmf(k, lambda_), color=colors[i], alpha=0.5) plt.legend() plt.title("Poisson distribution") plt.ylabel("PDF at $k$") plt.xlabel("$k$") plt.show() # Binomial plt.figure(figsize=(12, 6)) k = np.arange(0, 22) for p, color in zip([0.1, 0.3, 0.6, 0.8], colors): rv = binom(20, p) plt.plot(k, rv.pmf(k), lw=2, color=color, label=p) plt.fill_between(k, rv.pmf(k), color=color, alpha=0.5) plt.legend() plt.title("Binomial distribution") plt.tight_layout() plt.ylabel("PDF at $k$") plt.xlabel("$k$") plt.show() # Alpha x = np.linspace(0.1, 2, 100) alpha = scipy.stats.alpha alphas = [0.5, 1, 2, 4] plt.figure(figsize=(12, 6))