def stepSEIpR_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, model=None, dist='poisson') -> tuple: """ Defines an stochastic model SEIpRs: - inits = (E,I,S) - theta = infectious individuals from neighbor sites """ if simstep == 0: # get initial values E, I, S = (bi.get('e', bi.get(b'e')), bi.get('i', bi.get(b'i')), bi.get('s', bi.get(b's'))) else: E, I, S = inits N = totpop beta = bp.get('beta', bp.get(b'beta')) alpha = bp.get('alpha', bp.get(b'alpha')) e = bp.get('e', bp.get(b'e')) r = bp.get('r', bp.get(b'r')) # delta = bp.get('delta', bp.get(b'delta')); b = bp.get('b', bp.get(b'b')) # w = bp.get('w', bp.get(b'w')); p = bp.get('p', bp.get(b'p')) R = max(0, N - E - I - S) Lpos_esp = float(beta) * S * ((I + theta) / (N + npass))**alpha # Number of new cases Lpos2_esp = p * float(beta) * R * ( (I + theta) / (N + npass))**alpha # secondary infections if dist == 'poisson': Lpos = poisson(np.nan_to_num(Lpos_esp)) Lpos2 = poisson(np.nan_to_num(Lpos2_esp)) elif dist == 'negbin': prob = I / (I + Lpos_esp) # converting between parameterizations Lpos = negative_binomial(I, prob) prob = I / (I + Lpos2_esp) # converting between parameterizations Lpos2 = negative_binomial(I, prob) Lpos = min(S, Lpos) # to avoid underflow Lpos2 = min(R, Lpos2) # to avoid underflow # Model Epos = (1 - e) * E + Lpos + Lpos2 Ipos = e * E + (1 - r) * I Spos = S + b - Lpos Rpos = R + r * I - Lpos2 # Migrating infecctious migInf = Ipos return [Epos, Ipos, Spos], Lpos + Lpos2, migInf
def Simulation(days=300 , nd=30 , Rt=None , muT=4 , sizeV=1 , limit=1000000 , pp=0.001 , n0=1 ): # days: observation period # nd: simulation period # Rt = rr # infection rate pattern # muT is the mean time an infected person will transmit the virus to (i.e., infect) another person. # We assume that the independence among those ones being infected. The default value is set as muT = 4 (days). # sizeV: the dispersion parameter so that variance = mu + mu^2/size. The default value is set as sizeV =1. # limit: the target/study population size # pp: the proportion of people with immunity in the population # n0: the initial number of infectious persons. # The default setting assumes one virus carrier/infectious person in the beginning, i.e., n0=1. kk = [0 for i in range(days)] # kk: daily new cases atrisk =[0 for i in range(days)] # atrisk: number of active cases each day; simulation period of nn days tt = 0 # the cumulative total number of confirmed cases. if nd > len(Rt): print("The length of Rt should not be smaller than nd.") sys.exit(0) stoplimit = limit*(1-pp) nk = n0 # The initial number of existing infectious persons. # there must be a first patient to kick off the transmission process! #------ First Day Of Simulation ------ for k in range(nk): if tt>stoplimit: Rt[0]=0.001 ni = rn.poisson(Rt[0],1)[0] # how many people will be infected by this existing virus carrier person. imuind = rn.choice(2,1,True,[1-pp,pp])[0] # if people with immunity ni=0 if(imuind==1): ni=0 tt=tt+ni if(ni > 0): tk=[0 for i in range(ni)] for i in range(ni): tk[i]= rn.negative_binomial(1,sizeV/(sizeV + muT),size=round(sizeV))[0]+1 # this is the nth day on which a new case occurs kk[tk[i]-1] = kk[tk[i]-1] + 1 pastevent =[1 for i in range(max(tk)-1)]+[0 for i in range((days-max(tk)+1))] atrisk = [sum(i) for i in zip(atrisk, pastevent)] #atrisk = atrisk + pastevent #---------------------------------------- #------ Day 2 to nd --------- for j in range(1,nd): nk = kk[j-1] # this is the number of people newly infected (i.e., new cases) on (j-1)th day if(nk > 0): for k in range(nk): if(tt>stoplimit): Rt[j]=0.001 ni = rn.poisson(Rt[j],1)[0] # how many people will be infected by this existing virus carrier person. imuind = rn.choice(2,1,True,[1-pp,pp])[0] # This Person is immunity or not 1= immunity 0= not immunity if(imuind==1): # if This person is immunity , it can not transmit the disease. ni=0 tt=tt+ni if(ni > 0): tk=[0 for i in range(ni)] for i in range(ni): tk[i] = rn.negative_binomial(1,sizeV/(sizeV + muT),size=round(sizeV))[0]+1+j # this is the nth day on which a new case occurs kk[tk[i]-1] = kk[tk[i]-1] + 1 pastevent = [0 for l in range(j-1)]+[1 for l in range(max(tk)+1-j)]+[0 for l in range(days-max(tk))] atrisk = [sum(i) for i in zip(atrisk, pastevent)] return [atrisk,kk,tt] # riskpopu = atrisk, dailynew = kk, total=tt
def test_results_sparse(): # set seed seed(1234) # The following construction is inefficient, but makes sure that the same data is used in the sparse case adata = AnnData( np.multiply(binomial(1, 0.15, (100, 20)), negative_binomial(2, 0.25, (100, 20)))) # adapt marker_genes for cluster (so as to have some form of reasonable input adata.X[0:10, 0:5] = np.multiply(binomial(1, 0.9, (10, 5)), negative_binomial(1, 0.5, (10, 5))) adata_sparse = AnnData(sp.csr_matrix(adata.X)) # Create cluster according to groups smp = 'true_groups' true_groups = np.zeros((2, 100), dtype=bool) true_groups[0, 0:10] = 1 true_groups[1, 10:100] = 1 adata_sparse.add[smp + '_masks'] = true_groups adata_sparse.add[smp + '_order'] = np.asarray(['0', '1']) # Here, we have saved the true results # Now run the rank_genes_groups, test functioning. # Note: Default value is on copying = true. with open('objs_t_test.pkl', 'rb') as f: # Python 3: open(..., 'rb') true_scores_t_test, true_names_t_test = pickle.load(f) with open('objs_wilcoxon.pkl', 'rb') as f: # Python 3: open(..., 'rb') true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f) rank_genes_groups(adata_sparse, 'true_groups', n_genes=20, test_type='t_test') # Here, we allow a minor error tolerance due to different multiplication for sparse/non-spars objects ERROR_TOLERANCE = 5e-7 max_error = 0 for i, k in enumerate(adata_sparse.add['rank_genes_groups_gene_scores']): max_error = max( max_error, abs(adata_sparse.add['rank_genes_groups_gene_scores'][i][0] - true_scores_t_test[i][0])) max_error = max( max_error, abs(adata_sparse.add['rank_genes_groups_gene_scores'][i][1] - true_scores_t_test[i][1])) # assert np.array_equal(true_scores_t_test,adata_sparse.add['rank_genes_groups_gene_scores']) assert max_error < ERROR_TOLERANCE rank_genes_groups(adata_sparse, 'true_groups', n_genes=20, test_type='wilcoxon') assert np.array_equal(true_scores_wilcoxon, adata_sparse.add['rank_genes_groups_gene_scores']) assert np.array_equal(true_names_wilcoxon, adata_sparse.add['rank_genes_groups_gene_names'])
def sample_gen(self, num=1): sample = np.zeros( (num, self.dim)) # output is a ndarray with (num, dim) x_ = np.array(self.x) b_ = np.array(self.beta) sum_x = np.array([ x_[i] for i in range(0, self.dim) if np.isnan(x_[i]) == False ]).sum() sum_b = sum(b_) r_ = np.array(self.r) inv_r = [1 / r_[i] for i in range(0, self.dim)] c = (1 / min(self.r))**(sum_b - 1) * sp.beta(sum_x + 1, sum_b - 1) for i in range(0, self.dim): if isnan(x_[i]) == True: sample[:, i] == np.nan for i in range(0, num): count = 0 while True: count = count + 1 yy = [ rd.negative_binomial(n=x_[i] + b_[i], p=r_[i] / (r_[i] + 1)) for i in range(0, self.dim) ] u = rd.random() if u <= K(inv_r, x_ + yy + b_, sum_b - 1)[0] / c: sample[i, :] = yy break if count > 10000: sample[i, :] = np.nan break return sample
def __init__(self, num_retailers=2, length=100, warm_up=None, stock=100, high_var=True, high_c_shortage=True, demands=None, distribution=None, L0=2, h0=0.1, Li=2): self.length = length self.warehouse = wh.Warehouse(stock=stock, lead=L0, c_holding=h0) self.stats = None self.num_retailers = num_retailers self.warm_up = warm_up self.h0 = h0 self.Li = Li for i in range(num_retailers): if demands is None: if not high_var: n = 20 p = 0.5 self.distribution = binomial(n, p) random = rand.binomial(n, p, length) else: n = 20 p = 2 / 3 self.distribution = neg_binomial(n, p) random = [i for i in rand.negative_binomial(n, p, length)] else: random = demands[i] self.distribution = distribution r = rt.Retailer(i, self.warehouse, demands=random, lead=Li) if high_c_shortage: r.c_shortage = 4.9 else: r.c_shortage = 0.9 self.warehouse.add_retailer(r)
def get_example_data(*, sparse=False): # create test object adata = AnnData(np.multiply(binomial(1, 0.15, (100, 20)), negative_binomial(2, 0.25, (100, 20)))) # adapt marker_genes for cluster (so as to have some form of reasonable input adata.X[0:10, 0:5] = np.multiply(binomial(1, 0.9, (10, 5)), negative_binomial(1, 0.5, (10, 5))) # The following construction is inefficient, but makes sure that the same data is used in the sparse case if sparse: adata.X = sp.csr_matrix(adata.X) # Create cluster according to groups adata.obs['true_groups'] = pd.Categorical(np.concatenate(( np.zeros((10,), dtype=int), np.ones((90,), dtype=int), ))) return adata
def rztnb(mu=3, alpha=0.5, size=100): r = 1.0 / alpha p = mu / (mu + r + 0.0) ztnb = [] while (len(ztnb) < size): x = negative_binomial(n=r, p=1 - p) if x > 0: ztnb.append(x) return ztnb
def test_compute_distribution(): # set seed seed(1234) # create test object adata = AnnData( np.multiply(binomial(1, 0.15, (100, 20)), negative_binomial(2, 0.25, (100, 20)))) # adapt marker_genes for cluster (so as to have some form of reasonable input adata.X[0:10, 0:5] = np.multiply(binomial(1, 0.9, (10, 5)), negative_binomial(1, 0.5, (10, 5))) # Create cluster according to groups smp = 'true_groups' true_groups = np.zeros((2, 100), dtype=bool) true_groups[0, 0:10] = 1 true_groups[1, 10:100] = 1 adata.add[smp + '_masks'] = true_groups adata.add[smp + '_order'] = np.asarray(['0', '1']) # Now run the rank_genes_groups, test functioning. # Note: Default value is on copying = true. with open('objs_t_test.pkl', 'rb') as f: # Python 3: open(..., 'rb') true_scores_t_test, true_names_t_test = pickle.load(f) with open('objs_wilcoxon.pkl', 'rb') as f: # Python 3: open(..., 'rb') true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f) rank_genes_groups(adata, 'true_groups', n_genes=20, compute_distribution=True, test_type='t_test') assert np.array_equal(true_scores_t_test, adata.add['rank_genes_groups_gene_scores']) assert np.array_equal(true_names_t_test, adata.add['rank_genes_groups_gene_names']) rank_genes_groups(adata, 'true_groups', n_genes=20, compute_distribution=True, test_type='wilcoxon') assert np.array_equal(true_scores_wilcoxon, adata.add['rank_genes_groups_gene_scores']) assert np.array_equal(true_names_wilcoxon, adata.add['rank_genes_groups_gene_names'])
def simulation_parameters(total, s): seed(s) npseed(s) r_p = {i: poisson(15) for i in range(total)} # El tiempo de recuperacion c_p = {i: min(r_p[i], negative_binomial(6, 0.5)) for i in range(total)} # Tiempo en que se da cuenta contagios = {i: random() for i in range(total) } # Esto es basicamente para la bernoulli return r_p, c_p, contagios
def stepSEIpR_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, dist='poisson'): """ Defines an stochastic model SEIpRs: - inits = (E,I,S) - theta = infectious individuals from neighbor sites """ if simstep == 1: # get initial values E, I, S = (bi['e'], bi['i'], bi['s']) else: E, I, S = inits N = totpop beta = bp['beta']; alpha = bp['alpha']; e = bp['e']; r = bp['r']; # delta = bp['delta']; b = bp['b']; # w = bp['w']; p = bp['p'] R = N - E - I - S Lpos_esp = float(beta) * S * ((I + theta) / (N + npass)) ** alpha # Number of new cases Lpos2_esp = p * float(beta) * R * ((I + theta) / (N + npass)) ** alpha # secondary infections if dist == 'poisson': Lpos = poisson(Lpos_esp) Lpos2 = poisson(Lpos2_esp) elif dist == 'negbin': prob = I / (I + Lpos_esp) # converting between parameterizations Lpos = negative_binomial(I, prob) prob = I / (I + Lpos2_esp) # converting between parameterizations Lpos2 = negative_binomial(I, prob) # Model Epos = (1 - e) * E + Lpos + Lpos2 Ipos = e * E + (1 - r) * I Spos = S + b - Lpos Rpos = N - (Spos + Ipos) - Lpos2 # Migrating infecctious migInf = Ipos return [0, Ipos, Spos], Lpos + Lpos2, migInf
def plmm_sample(size, algo): M = size N = M * gp train_true = {} train_true['K'] = K train_true['M'] = M train_true['N'] = N train_true['alpha'] = alpha train_true['sigma'] = sigma train_true['beta'] = beta train_true['g'] = [val for val in range(M) for _ in range(gp)] train_true['normal_mean'] = normal_mean train_true['normal_cov'] = normal_cov train_true['bin_p'] = bin_p train_true['x'] = np.zeros(shape = (N, K)) train_true['x'][:, 0:2] = np.random.multivariate_normal(normal_mean, normal_cov, N) train_true['x'][:, 2:4] = [np.random.binomial(1, p = bin_p) for i in range(N)] train_true['a'] = np.random.normal(loc = 0, scale = sigma, size=M) train_true['log_m'] = (train_true['alpha'] + train_true['a'][train_true['g']].T + np.matmul(train_true['x'], train_true['beta'])) train_true['m'] = np.array([math.exp(x) for x in train_true['log_m']]) # train_true['y'] = np.random.poisson(lam = train_true['m']) train_true['p'] = (train_true['m'] / r) / (1 + (train_true['m'] / r)) train_true['y'] = npr.negative_binomial(r, train_true['p']) train_true['g'] = [(val+1) for val in range(M) for _ in range(gp)] #reset starting from 0 for stan train_true['test_M'] = test_M train_true['test_N'] = test_N train_true['test_x'] = test_x train_true['test_a'] = test_a train_true['test_log_m'] = test_log_m train_true['test_m'] = test_m train_true['test_g'] = test_g train_true['test_p'] = test_p train_true['test_y'] = test_y train_true_file = 'plmm_'+algo+'_M'+str(M)+'_train_true.data.R' stanhelper.stan_rdump(train_true, train_true_file) train_dict = {} train_dict['M'] = M train_dict['N'] = N train_dict['K'] = K train_dict['y'] = train_true['y'] train_dict['x'] = train_true['x'] train_dict['g'] = train_true['g'] train_dict['test_M'] = train_true['test_M'] train_dict['test_N'] = train_true['test_N'] train_dict['test_y'] = train_true['test_y'] train_dict['test_x'] = train_true['test_x'] train_dict['test_g'] = train_true['test_g'] train_dict['test_a'] = train_true['test_a'] train_dict_file = 'plmm_'+algo+'_M'+str(M)+'_train_dict.data.R' stanhelper.stan_rdump(train_dict, train_dict_file) output_file = 'plmm_'+algo+'_M'+str(M)+'_output.csv' subprocess.call('./plmm sample data file='+train_dict_file +' output file='+output_file, shell=True) result = stanhelper.stan_read_csv(output_file)
def simulate_BNB(mean, sigma, n): # sys.stderr.write("%g %g %g\n" % (mean, sigma, n)) mean_p = np.float64(n) / (n+mean) sigma = (1 / sigma)**2 a = mean_p * (sigma)+1 b = (1 - mean_p)*sigma p = beta(a, b) #sys.stderr.write("%f %f\n"%(n,p)) counts = negative_binomial(n, p) return counts
def rvs(self, x=None, size=[], return_xy=False): if x is None: assert isinstance(size, int) x = npr.randn(size, self.D_in) else: assert x.ndim == 2 and x.shape[1] == self.D_in psi = x.dot(self.A.T) + self.b.T p = logistic(psi) y = npr.negative_binomial(self.r, 1-p) return (x, y) if return_xy else y
def rvs(self, x=None, size=[], return_xy=False): if x is None: assert isinstance(size, int) x = npr.randn(size, self.D_in) else: assert x.ndim == 2 and x.shape[1] == self.D_in psi = x.dot(self.A.T) + self.b.T p = logistic(psi) y = npr.negative_binomial(self.r, 1 - p) return (x, y) if return_xy else y
def stepSEIR_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, model=None, dist='poisson') -> tuple: """ Defines an stochastic model SEIR: - inits = (E,I,S) - par = (Beta, alpha, E,r,delta,B,w,p) see docs. - theta = infectious individuals from neighbor sites """ if simstep == 0: # get initial values E, I, S = (bi.get('e', bi.get(b'e')), bi.get('i', bi.get(b'i')), bi.get('s', bi.get(b's'))) else: E, I, S = inits N = totpop beta = bp.get('beta', bp.get(b'beta')) alpha = bp.get('alpha', bp.get(b'alpha')) e = bp.get('e', bp.get(b'e')) r = bp.get('r', bp.get(b'r')) # delta = bp.get('delta', bp.get(b'delta')); b = bp.get('b', bp.get(b'b')) # w = bp.get('w', bp.get(b'w')); # p = bp.get('p', bp.get(b'p')) Lpos_esp = float(beta) * S * ((I + theta) / (N + npass))**alpha # Number of new cases if dist == 'poisson': Lpos = poisson(np.nan_to_num(Lpos_esp)) # poisson(Lpos_esp) ## if theta == 0 and Lpos_esp == 0 and Lpos > 0: ## print Lpos,Lpos_esp,S,I,theta,N,parentSite.sitename elif dist == 'negbin': prob = I / (I + Lpos_esp) # convertin between parameterizations Lpos = negative_binomial(I, prob) Lpos = min(S, Lpos) # to avoid underflow Epos = (1 - e) * E + Lpos Ipos = e * E + (1 - r) * I Spos = S + b - Lpos Rpos = N - (Spos + Epos + Ipos) # Migrating infecctious migInf = Ipos return [Epos, Ipos, Spos], Lpos, migInf
def simulation_parameters(total, delta_t, s): seed(s) npseed(s) r_p = {i: poisson(15) for i in range(total)} # El tiempo de recuperacion c_p = {i: min(r_p[i], negative_binomial(6, 0.5)) for i in range(total)} # Tiempo en que se da cuenta contagios = { t: {i: random() for i in range(total)} for t in range(delta_t + 1) } return r_p, c_p, contagios
def stepSIR_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, model=None, dist='poisson') -> tuple: """ Defines an stochastic model SIR: - inits = (E,I,S) - theta = infectious individuals from neighbor sites """ if simstep == 0: # get initial values E, I, S = (bi.get('e', bi.get(b'e')), bi.get('i', bi.get(b'i')), bi.get('s', bi.get(b's'))) else: E, I, S = inits N = totpop R = N - (E + I + S) beta = bp.get('beta', bp.get(b'beta')) alpha = bp.get('alpha', bp.get(b'alpha')) # e = bp.get('e', bp.get(b'e')); r = bp.get('r', bp.get(b'r')) # delta = bp.get('delta', bp.get(b'delta')); b = bp.get('b', bp.get(b'b')) # w = bp.get('w', bp.get(b'w')); # p = bp.get('p', bp.get(b'p')) Lpos_esp = float(beta) * S * ((I + theta) / (N + npass))**alpha # Number of new cases if dist == 'poisson': Lpos = poisson(Lpos_esp) elif dist == 'negbin': prob = I / (I + Lpos_esp) # convertin between parameterizations Lpos = negative_binomial(I, prob) Lpos = min(S, Lpos) # to avoid underflow # Model Ipos = (1 - r) * I + Lpos Spos = S + b - Lpos Rpos = R + r * I # Migrating infecctious migInf = Ipos return [0, Ipos, Spos], Lpos, migInf
def survives(t, λ_α, λ_β, μ_α, μ_β, ρ): Δ = pareto(μ_α[0]) * μ_β[0] μ_α += 1 μ_β += Δ if Δ > t: if uniform(0., 1.) < ρ: return True Δ = t t_end = t - Δ s = negative_binomial(λ_α[0], λ_β[0] / (λ_β[0] + Δ)) λ_α += s λ_β += Δ for i in range(s): τ = uniform(t_end, t) if survives(τ, λ_α, λ_β, μ_α, μ_β, ρ): return True return False
def generate_demands(periods, high_var): random = [] if not high_var: n = 20 p = 0.5 dist = binomial(n, p) for i in range(2): demand = rand.binomial(n, p, periods) random.append(demand) else: n = 20 p = 2 / 3 dist = neg_binomial(n, p) for i in range(2): demand = rand.negative_binomial(n, p, periods) random.append(demand) return random, dist
def sample_binomial_frag_len(frag_mean=200, frag_variance=100): """ Sample a fragment length from a binomial distribution parameterized with a mean and variance. If frag_variance > frag_mean, use a Negative-Binomial distribution. """ assert(abs(frag_mean - frag_variance) > 1) if frag_variance < frag_mean: p = 1 - (frag_variance/float(frag_mean)) # N = mu/(1-(sigma^2/mu)) n = float(frag_mean) / (1 - (float(frag_variance)/float(frag_mean))) return binomial(n, p) else: r = -1 * (power(frag_mean, 2)/float(frag_mean - frag_variance)) p = frag_mean / float(frag_variance) print "Sampling frag_mean=",frag_mean, " frag_variance=", frag_variance print "r: ",r, " p: ", p return negative_binomial(r, p)
def stepSEIR_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, dist='poisson'): """ Defines an stochastic model SEIR: - inits = (E,I,S) - par = (Beta, alpha, E,r,delta,B,w,p) see docs. - theta = infectious individuals from neighbor sites """ if simstep == 1: # get initial values E, I, S = (bi['e'], bi['i'], bi['s']) else: E, I, S = inits N = totpop beta = bp['beta']; alpha = bp['alpha']; e = bp['e']; r = bp['r']; # delta = bp['delta']; b = bp['b']; # w = bp['w']; # p = bp['p'] Lpos_esp = float(beta) * S * ((I + theta) / (N + npass)) ** alpha # Number of new cases if dist == 'poisson': Lpos = poisson(Lpos_esp) # poisson(Lpos_esp) ## if theta == 0 and Lpos_esp == 0 and Lpos > 0: ## print Lpos,Lpos_esp,S,I,theta,N,parentSite.sitename elif dist == 'negbin': prob = I / (I + Lpos_esp) # convertin between parameterizations Lpos = negative_binomial(I, prob) Epos = (1 - e) * E + Lpos Ipos = e * E + (1 - r) * I Spos = S + b - Lpos Rpos = N - (Spos + Epos + Ipos) # Migrating infecctious migInf = Ipos return [Epos, Ipos, Spos], Lpos, migInf
def stepSIRS_s(inits, simstep, totpop, theta=0, npass=0, bi=None, bp=None, values=None, dist='poisson'): """ Defines an stochastic model SIR: - inits = (E,I,S) - theta = infectious individuals from neighbor sites """ if simstep == 1: # get initial values E, I, S = (bi['e'], bi['i'], bi['s']) else: E, I, S = inits N = totpop R = N - (E + I + S) beta = bp['beta']; alpha = bp['alpha']; # e = bp['e']; r = bp['r']; # delta = bp['delta']; b = bp['b']; w = bp['w']; # p = bp['p'] Lpos_esp = float(beta) * S * ((I + theta) / (N + npass)) ** alpha # Number of new cases if dist == 'poisson': Lpos = poisson(Lpos_esp) elif dist == 'negbin': prob = I / (I + Lpos_esp) # convertin between parameterizations Lpos = negative_binomial(I, prob) # Model Ipos = (1 - r) * I + Lpos Spos = S + b - Lpos + w * R Rpos = N - (Spos + Ipos) - w * R # Migrating infecctious migInf = Ipos return [0, Ipos, Spos], Lpos, migInf
def sample_gen(self, num=1): sample = np.zeros((num, self.dim)) # sample is a (num, dim) vector x_ = np.array(self.x) r_ = np.array(self.r) for i in range( 0, self.dim ): # index is a parameter which determines the shape of improper functions (larger than 0) if isnan(x_[i]) == True: # if x is nan, return nan sample[:, i] = np.nan else: if x_[i] != 0.0: w = 0.0 else: w = 1.0 / (1.0 + self.eta * gamma(self.kappa) / (r_[i]**self.kappa)) for j in range(0, num): if rd.rand() < w: sample[j, i] = 0 else: sample[j, i] = rd.negative_binomial(n=x_[i] + self.kappa, p=r_[i] / (r_[i] + 1)) return sample
**kwds) def nb_fit(y): import numpy y = numpy.array([[yy] for yy in y]) X = numpy.array([[1.0] for yy in y]) mod = NBin(y, X) res = mod.fit() return tuple(res.params) from numpy.random import poisson, negative_binomial y = list(poisson(100, 100)) y = list(negative_binomial(1, 0.9, 100)) from matplotlib import pylab pylab.hist(y) pylab.show() print y print nb_fit(y) # y = numpy.array( # [[ 4.0], # [ 9.0], # [ 3.0], # [ 9.0], # [ 1.0]]) # X = numpy.array( # [[ 1.0],
def contact_trace_delay(self): return 1 + npr.negative_binomial( 1, self.prob_of_successful_contact_trace_today)
def generate_network(num_dv, devices, shared_folders): # shared folders per device - negative_binomial (s, mu) DV_DG = [0.470, 1.119] # device per shared folder - negative_binomial (s, mu) SF_DG = [0.231, 0.537] # derive the expected number of shared folders using the negative_binomials # this piece is just converting the parameterization of the # negative_binomials from (s, mu) to "p". Then, we use the rate between # the means to estimate the expected number of shared folders # from the given number of devices dv_s = DV_DG[0] dv_m = DV_DG[1] dv_p = dv_s / (dv_s + dv_m) nd = 1 + (dv_s * (1.0 - dv_p) / dv_p) sf_s = SF_DG[0] sf_m = SF_DG[1] sf_p = sf_s / (sf_s + sf_m) dn = 1 + (sf_s * (1.0 - sf_p) / sf_p) # the number of shared folders is finally derived num_sf = int(num_dv * nd / dn) # sample the number of devices per shared folder (shared folder degree) sf_dgr = [x + 1 for x in random.negative_binomial(sf_s, sf_p, num_sf)] # sample the number of shared folders per device (device degree) dv_dgr = [x + 1 for x in random.negative_binomial(dv_s, dv_p, num_dv)] # create the population of edges leaving shared folders l = [i for i, j in enumerate(sf_dgr) for k in range(min(j, num_dv))] random.shuffle(l) sf_pop = deque(l) # create empty shared folders for sf_id in range(num_sf): shared_folders[sf_id] = SharedFolder(sf_id) # first we pick a random shared folder for each device for dv_id in range(num_dv): devices[dv_id] = Device(dv_id) sf_id = sf_pop.pop() devices[dv_id].add_shared_folder(shared_folders[sf_id]) shared_folders[sf_id].add_device(devices[dv_id]) # then we complement the shared folder degree # we skip devices with degree 1 in a first pass, since they just got 1 sf r = 1 # we might have less edges leaving devices than necessary while sf_pop: # create the population of edges leaving devices l = [i for i, j in enumerate(dv_dgr) for k in range(min(j - r, num_sf))] random.shuffle(l) dv_pop = deque(l) # if we need to recreate the population, we use devices w/ degree 1 too r = 0 while sf_pop and dv_pop: dv = dv_pop.pop() sf = sf_pop.pop() # we are lazy and simply skip the unfortunate repetitions if not shared_folders[sf] in devices[dv].my_shared_folders: devices[dv].add_shared_folder(shared_folders[sf]) shared_folders[sf].add_device(devices[dv]) else: sf_pop.append(sf)
def rvs(self, size=None): return random.negative_binomial(self.n, self.p, size=size)
def intialize(self): '''Intialize paramters for mcmc ''' self.c = npr.negative_binomial(self.r, self.u, 1)[0] + 2 self.s = self._sample_seeds(self.c)
timecost.append([mid_time-start_time,time.time()-mid_time]) #f start_time=time.time() a=dsg.f(4,5,times) mid_time=time.time() b=nr.f(4,5,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #negative_binomial start_time=time.time() a=dsg.negative_binomial(5,0.5,times) mid_time=time.time() b=nr.negative_binomial(5,0.5,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #zipf start_time=time.time() a=dsg.zipf(1.25,times) mid_time=time.time() b=nr.poisson(1.25,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #power start_time=time.time() a=dsg.power(1.5,times) mid_time=time.time()
def main(): # some hardcoded defaults of course.... seed_num = 7 if QUICK: nrnd = 100 else: nrnd = 1000000 bin_prob = 0.005 reso = 10000 if QUICK: chroms = OrderedDict([('1', 50), ('2', 30)]) npeaks = 8 # probability that an interaction comes from a loop loop_prob = 1 else: chroms = OrderedDict([('1', 500), ('2', 300), ('3', 200)]) npeaks = 20 # probability that an interaction comes from a loop loop_prob = 0.5 ############################################# genome_size = sum(chroms.values()) sections, bins, weighted_chroms = load_genome(chroms) seed(seed_num) np.random.seed(seed_num) cmprts_pos = {} bad_cols = {} prob = 0.2 step = 10 for c in chroms: bad_cols[c] = set() if not QUICK: for _ in range(chroms[c] // 10): bad_cols[c].add(int(random() * chroms[c])) cmprts_pos[c] = [] end = 0 beg = 0 while end < chroms[c]: if random() < prob: cmprts_pos[c].append((beg, end)) beg = end end += step cmprts_pos[c].append((beg, end)) cmprts = {} for c in cmprts_pos: cmprts[c] = { 'A': set(p for i, (beg, end) in enumerate(cmprts_pos[c]) for p in range(beg, end) if i % 2), 'B': set(p for i, (beg, end) in enumerate(cmprts_pos[c]) for p in range(beg, end) if not i % 2) } peaks = set() peaks1 = set() peaks2 = set() for c in range(npeaks): bin1 = int(random() * (genome_size - 2)) if random() < 0.4: peaks1.add(bin1) else: peaks2.add(bin1) peaks.add(bin1) if not QUICK: loops = set() for bin1 in peaks: for bin2 in peaks: if random() < 0.1: continue if bin1 in peaks1: range1 = 3 else: range1 = 2 if bin2 in peaks1: range2 = 3 else: range2 = 2 for i in range(range1): for j in range(range2): loops.add((bin1 + i, bin2 + j)) loops.add((bin2 + j, bin1 + i)) else: loops = set() for bin1 in peaks1: for bin2 in peaks2: loops.add((bin1, bin2)) loops.add((bin2, bin1)) print('generating SAM') Popen('mkdir -p {}/tmp'.format(TEST_PATH), shell=True).communicate() Popen('mkdir -p {}/data'.format(TEST_PATH), shell=True).communicate() out = open(os_join(TEST_PATH, 'tmp', 'fake.sam'), 'w') out.write('@HD\tVN:1.5\tSO:coordinate\n') for c in chroms: out.write('@SQ\tSN:%s\tLN:%d\n' % (c, chroms[c] * reso - 1)) matrix = [[0 for _ in range(sum(chroms.values()))] for _ in range(sum(chroms.values()))] nbs = iter(negative_binomial(1, bin_prob, size=nrnd)) count = 0 while count < nrnd: c1 = weighted_chroms[int(random() * len(weighted_chroms))] pos1 = int(random() * chroms[c1]) if random() > (float(chroms[c1]) / genome_size)**0.8: c2 = weighted_chroms[int(random() * len(weighted_chroms))] pos2 = int(random() * chroms[c2]) else: c2 = c1 pos2 = -1 while pos2 < 0 or pos2 >= chroms[c2]: try: if random() < 0.15: wanted_cmprt = 'A' if pos1 in cmprts[c1]['A'] else 'B' while pos2 not in cmprts[c2][wanted_cmprt]: pos2 = pos1 + (next(nbs) * (-1 if random() > 0.5 else 1)) else: pos2 = pos1 + (next(nbs) * (-1 if random() > 0.5 else 1)) except StopIteration: nbs = iter(negative_binomial(1, bin_prob, size=nrnd)) if pos1 in bad_cols[c1] or pos2 in bad_cols[c2]: if random() < 0.5: continue bin1 = sections[c1] + pos1 bin2 = sections[c2] + pos2 if random() <= loop_prob: if (bin1, bin2) not in loops: continue out.write( 'SRR.{0}\t1024\t{1}\t{2}\t1\t75P\t{3}\t{4}\t75\t*\t*\n'.format( count, c1, int(reso / 2 + pos1 * reso), c2, int(reso / 2 + pos2 * reso))) out.write( 'SRR.{0}\t1024\t{1}\t{2}\t1\t75P\t{3}\t{4}\t75\t*\t*\n'.format( count, c2, int(reso / 2 + pos2 * reso), c1, int(reso / 2 + pos1 * reso))) matrix[bin1][bin2] += 1 matrix[bin2][bin1] += 1 count += 1 out.close() print('generating BAM') Popen('samtools sort -@ 8 -O BAM {} > {}'.format( os_join(TEST_PATH, 'tmp', 'fake.sam'), os_join(TEST_PATH, 'data', 'fake.bam')), shell=True).communicate() Popen('rm -f {}'.format(os_join(TEST_PATH, 'tmp', 'fake.sam')), shell=True).communicate() Popen('samtools index -@ 8 {}'.format( os_join(TEST_PATH, 'data', 'fake.bam')), shell=True).communicate() Popen(('tadbit normalize -w {}/tmp --bam {} -r {} --min_count {} ' '--normalize_only').format(TEST_PATH, os_join(TEST_PATH, 'data', 'fake.bam'), reso, 0 if QUICK else 100), shell=True).communicate() Popen(("mv {0}/tmp/04_normalization/biases_*pickle " "{0}/data/biases.pickle").format(TEST_PATH), shell=True).communicate() Popen('rm -rf {}/tmp'.format(TEST_PATH), shell=True).communicate() if QUICK: plt.figure(figsize=(10, 7)) else: plt.figure(figsize=(61, 45)) plt.imshow(np.log2(matrix), interpolation='None', origin='lower') total = 0 xs = [] ys = [] for k in chroms: xs.append(total) xs.append(total) ys.append(total) total += chroms[k] ys.append(total) plt.plot(xs, ys, color='k', ls='--') plt.plot(ys, xs, color='k', ls='--') plt.plot([0, total], [0, total], color='k', alpha=0.5) plt.hlines(list(peaks1), 0, list(peaks1), colors='r') plt.vlines(list(peaks1), list(peaks1), len(matrix), colors='r') plt.hlines(list(peaks2), 0, list(peaks2), colors='b') plt.vlines(list(peaks2), list(peaks2), len(matrix), colors='b') for p1 in peaks: for p2 in peaks: if p1 >= p2: continue if not ((p1 in peaks1 and p2 in peaks2) or (p1 in peaks2 and p2 in peaks1)): continue for k in range(9): for l in range(9): plt.text(p1 + k - 4, p2 + l - 4, matrix[p1 + k - 4][p2 + l - 4], size=2, va='center', ha='center') plt.xlim(0, total) plt.ylim(0, total) plt.colorbar() plt.savefig(os_join(TEST_PATH, 'data', 'matrix.pdf'), format='pdf') print('saving matrix as pickle') out = open(os_join(TEST_PATH, 'data', 'matrix.pickle'), 'wb') dump(matrix, out) out.close() print('Saving BEDs') out = open(os_join(TEST_PATH, 'data', 'peaks_protA.bed'), 'w') out.write(''.join( '{0}\t{1}\t{2}\n'.format(bins[p][0], bins[p][1] * reso + int(random() * 1000), bins[p][1] * reso + int(random() * 1000)) for p in sorted(peaks1))) out.close() out = open(os_join(TEST_PATH, 'data', 'peaks_protB.bed'), 'w') out.write(''.join( '{0}\t{1}\t{2}\n'.format(bins[p][0], bins[p][1] * reso + int(random() * 1000), bins[p][1] * reso + int(random() * 1000)) for p in sorted(peaks2))) out.close() Popen(('cat {0}/data/peaks_protA.bed {0}/data/peaks_protB.bed | ' 'sort -k1n -k2n > {0}/data/peaks_prot.bed').format(TEST_PATH), shell=True).communicate() out = open(os_join(TEST_PATH, 'data', 'compartments.bed'), 'w') out.write(''.join( '{}\t{}\t{}\t{}\n'.format(c, p * reso, p * reso + reso, (1 if p in cmprts[c]['A'] else -1) * (0.2 + 0.8 * random())) for c in chroms for p in range(chroms[c]))) out.close()
def service_time(self): return negative_binomial(119, 0.24878)
def run_read_simulation_multi( INFILE, COV, READLEN, INSERLEN, NBINOM, A1, A2, MINLENGTH, MUTATE, MUTRATE, AGE, DAMAGE, GEOM_P, THEMIN, THEMAX, PROCESS, ): print("===================\n===================") print("Genome: ", INFILE) print("Coverage: ", COV) print("Read length: ", READLEN) print("Mean Insert length: ", INSERLEN) print("n parameter for Negative Binomial insert length distribution: ", NBINOM) print("Adaptor 1: ", A1) print("Adaptor 2: ", A2) print("Mutation rate (bp/year):", MUTRATE) print("Age (years):", AGE) print("Deamination:", DAMAGE) nread = None global READSIZE global MARKOV_ORDER global QUALIT_FWD global MARKOV_SEED_FWD global MARKOV_START_FWD global MARKOV_DICT_FWD global QUALIT_REV global MARKOV_SEED_REV global MARKOV_START_REV global MARKOV_DICT_REV READSIZE = READLEN basename = get_basename(INFILE) fasta = read_fasta(INFILE) nread = int((fasta[1] / INSERLEN) * COV) print("Number of reads: ", nread) print("-------------------") MARKOV_ORDER = 10 QUALIT_FWD = get_fwd_qual() QUALIT_REV = get_rev_qual() MARKOV_SEED_FWD = mk.generate_kmer( qualities=QUALIT_FWD, order=MARKOV_ORDER, readsize=READLEN ) MARKOV_SEED_REV = mk.generate_kmer( qualities=QUALIT_REV, order=MARKOV_ORDER, readsize=READLEN ) MARKOV_START_FWD = MARKOV_SEED_FWD[0] MARKOV_START_REV = MARKOV_SEED_REV[0] MARKOV_DICT_FWD = MARKOV_SEED_FWD[1] MARKOV_DICT_REV = MARKOV_SEED_REV[1] # negative_binomial parameters prob = NBINOM / (NBINOM + INSERLEN) fragment_lengths = npr.negative_binomial(NBINOM, prob, nread) # Define Mutation rate if MUTATE: correct_mutrate = (MUTRATE * AGE) / fasta[1] else: correct_mutrate = 0 # Prepare fragments and errors all_fragments = sf.random_insert(fasta, fragment_lengths, READLEN, MINLENGTH) fwd_illu_err = markov_multi_fwd(process=PROCESS, nreads=len(all_fragments)) rev_illu_err = markov_multi_rev(process=PROCESS, nreads=len(all_fragments)) runlist = sf.prepare_run( all_frag=all_fragments, all_fwd_err=fwd_illu_err, all_rev_err=rev_illu_err ) result = multi_run( iterables=runlist, name=basename, mutate=MUTATE, mutrate=correct_mutrate, damage=DAMAGE, geom_p=GEOM_P, themin=THEMIN, themax=THEMAX, fwd_adaptor=A1, rev_adaptor=A2, read_length=READLEN, process=PROCESS, ) # write_fastq_multi(fastq_list=result, outputfile=FASTQ_OUT) return result, [nread * INSERLEN, INSERLEN, COV, DAMAGE]
gp = 10 r = 1 test_M = 1000 test_N = test_M * gp test_x = np.zeros(shape = (test_N, K)) test_x[:,0:2] = np.random.multivariate_normal(normal_mean, normal_cov, test_N) test_x[:, 2:4] = [np.random.binomial(1, p = bin_p) for i in range(test_N)] test_g = [val for val in range(test_M) for _ in range(gp)] test_a = np.random.normal(loc = 0, scale = sigma, size=test_M) test_log_m = alpha + test_a[test_g].T + np.matmul(test_x, beta) test_m = np.array([math.exp(x) for x in test_log_m]) test_g = [(val+1) for val in range(test_M) for _ in range(gp)] test_p = (test_m / r) / (1 + (test_m / r)) # test_y = np.random.poisson(lam = test_m) test_y = npr.negative_binomial(r, test_p) # In[26]: def plmm(size, algo): M = size N = M * gp train_true = {} train_true['K'] = K train_true['M'] = M train_true['N'] = N train_true['alpha'] = alpha train_true['sigma'] = sigma train_true['beta'] = beta
def _rvs(self, n, p): return mtrand.negative_binomial(n, p, self._size)
def negative_binomial(self, n, p): ''' Parameters:\n n: int, >0. p: float in range [0, 1]. ''' return r.negative_binomial(n, p, self.size)
def generate_buffer(self, n): bernouilles = npr.binomial(1, self.p, size=n) mus = npr.uniform(low=self.mean_low, high=self.mean_high, size=n) succ_ps = mus / self.CV2 / (mus + 1)**2. succ_ns = mus * succ_ps / (1. - succ_ps) return bernouilles * (1 + npr.negative_binomial(succ_ns, succ_ps))