def de_fun(state, control, params, noise): ''' Inputs: state: array of state variables [x,y] control: control parameter that is to be varied params: list of parameter values [kb, knb, rnb, a, eps1, eps2] Output: array for subsequent state ''' [x, y] = state # x (y) population after breeding (non-breeding) period [kb, knb, rnb, a] = params [eps1, eps2] = noise rb = control # Compute pop size after breeding period season t+1 # Parameters for negative binomial distribution mu = y * np.exp((rb - a * x) * (1 - y / kb)) p = ke / (ke + mu) # Compute pop size xnew = nbinom.rvs(ke, p) # Compute pop size after non-breeding period season t+1 ## Parameters for negative binomial distribution mu = xnew * np.exp(rnb * (1 - xnew / knb)) p = ke / (ke + mu) ynew = nbinom.rvs(ke, p) # Ouput updated state return np.array([xnew, ynew])
def c19_nbinom_rvs(r0, k, size=0): """Generates random variates""" n, p = c19_nbinom_transform(r0, k) if size > 1: r = nbinom.rvs(n, p, size=size) else: r = nbinom.rvs(n, p) return r
def simulate(self, length: int) -> pd.DataFrame: r"""Simulate outbreaks. Parameter --------- length Number of weeks to model. Returns ------- A ``DataFrame`` of an endemic time series where each row contains the case counts ot this week. """ if self.seed: np.random.seed(self.seed) mu_s = [ np.exp(self.baseline_frequency + self.trend * week + self._seasonality(week)) for week in range(length) ] if self.dispersion == 1: cases = [poisson.rvs(mu, size=1)[0] for mu in mu_s] else: cases = [] for mu in mu_s: r = np.float(mu / (self.dispersion - 1)) p = r / (r + mu) cases.append(nbinom.rvs(r, p, size=1)[0]) return (pd.DataFrame({ "n_cases": cases }).pipe(add_date_time_index_to_frame).assign( timestep=list(range(1, length + 1))))
def sample(self,n): data = np.zeros(n,dtype=np.int32) U = uniform.rvs(size=n) poisson_index = U <= self.pi data[poisson_index] = poisson.rvs(mu=self.lambda_0,loc=1,size=np.sum(poisson_index)) data[np.invert(poisson_index)] = nbinom.rvs(n=self.r,p=1-self.p,loc=1,size=n-np.sum(poisson_index)) return data
def randnegbinom(self, mu, sd, size): mu = float(mu) sd = float(sd) r = (mu * mu) / (sd * sd - mu) p = 1 - mu / (r + mu) result = nbinom.rvs(r, p, size=size) return result
def count_regression(n): X = [norm.rvs(.4, 1 / 9) for i in range(n)] U = [np.exp(1 * x) for x in X] p = 1 / 2 Y = [nbinom.rvs(p / (1 - p) * u, p) for u in U] data = [[Y[i], X[i]] for i in range(n)] def g(x, theta): return (X[0] - np.exp(X[1] * theta)) * X[1] def prior(theta): return t.pdf(x=theta, df=2.5, loc=0, scale=5) x_list = np.arange(-3, 3, .01) y_list = [prior(x) for x in x_list] plt.plot(x_list, y_list) plt.show() mh_block_sampler = MH_block_sampler(data, 1, 1, g, prior) mh_block_sampler.sample(1000) plt.hist([ mh_block_sampler.thetas[i] for i in range(len(mh_block_sampler.thetas)) ], density=True, bins=30) plt.show() return
def evaluate_iterated_game(genomes): # When using this evaluation function, a list of lists of genomes # should be provided to the main 'evaluate' function. agents = [] for g in genomes: net = NEAT.NeuralNetwork() g.BuildPhenotype(net) agents.append(NeuralNetworkAgent(net)) fitness = 0 n_total_rounds = 0.0 p1 = agents[0] for p2 in agents[1:]: n_rounds = nbinom.rvs(1, 0.02, 1) + 1 p1.flush() p2.flush() p1_payoff = 0 p2_payoff = 0 for i in range(n_rounds): p1_decision = p1.get_action([p1_payoff, p2_payoff]); p2_decision = p2.get_action([p2_payoff, p1_payoff]); p1_payoff = PAYOFFS[p1_decision][p2_decision] p2_payoff = PAYOFFS[p2_decision][p1_decision] p1.add_payoff(p1_payoff) n_total_rounds += 1.0 fitness = p1.get_total_payoff() / n_total_rounds fitness -= INTEL_PENALTY*(len(p1.net.neurons)) return fitness
def RandSFCorr(NumShow, SF=None, sfp=None,date=None,CalcNumDuck=True,Randomize=True): '''RandSFCorr(NumShow, SF=None, sfp=None,date=None,CalcNumDuck=True) Make a random-correction to the number of shows in order to get the number of geoducks. *SF is the show-factor. zero to one. It takes precedent over show-factor plot. *sfp is an instance of SFplot. It represents a show-factor plot or a list of show-factor plots. *date is only relevent if SF is undefined. *CalcNumDuck indicates that the number of geoducks in the show-factor plot(s) needs to be re-calculated ''' if SF==1: return(NumShow) if (SF==None) and (sfp==None):#ignore show-factor effect if there is no indication of value to use. return(NumShow) if isinstance(SF,(list,ndarray)): return(list(map(lambda sf: RandSFCorr(NumShow, SF=sf, ,CalcNumDuck=CalcNumDuck,Randomize=Randomize) ,SF))) if not(SF==None):#A single deterministic value for show-factor if not(Randomize):#deterministic return(float(NumShow)/SF) #Probabilistic result=NumShow+nbinom.rvs(NumShow,SF) return(result) #Get the show-factor from a set of show-factor data sf=sfp.RandSF(date=date,CalcNumDuck=CalcNumDuck) result=RandSFCorr(NumShow, SF=sf, CalcNumDuck=CalcNumDuck,Randomize=Randomize) return(result)
def neg_bin(mean, var): # Where does this is explained? p = mean / var n = mean * p / (1 - p) while(True): yield nbinom.rvs(n, p)
def Mod_NB(x): theta = x[:-1] r = 1 / x[-1] FM = Mod(theta, time_f) mu = np.diff(FM) p = r / (mu + r) #mu/(mu+r) FM_error = nbinom.rvs(r, p) return FM_error
def generate_negbin(N, r, prior): # sample from prior theta = prior.rvs() # generate samples x = nbinom.rvs(r, theta, size=N) return theta, x
def modelrvs(self, spec): # simulate seed data under model r_loc = self.get_r(spec, sim=True) for i in range(len(self.sim_data[spec][self.sc])): if np.random.random() > self.q0: self.data[spec][self.sc][i] = nbinom.rvs(n=r_loc[i], p=self.p0) else: self.data[spec][self.sc][i] = 0
def randnegbinom(self, mu, sd, size): mu = float(mu) sd = float(sd) r = (mu * mu) / (sd * sd - mu) p = 1 - mu / (r + mu) result = nbinom.rvs(r, p, size=size) #print('nbinom', describe(result)) return result
def simulator(home_mean, away_mean, niterations): # estimates probability of home team win home_game_score = [0] * niterations away_game_score = [0] * niterations home_win = [0] * niterations i = 0 while (i < niterations): home_game_score[i] = \ nbinom.rvs(n = 4.0, p = 4.0/(4.0 + home_mean), size = 1)[0] away_game_score[i] = \ nbinom.rvs(n = 4.0, p = 4.0/(4.0 + away_mean), size = 1)[0] if (home_game_score[i] > away_game_score[i]): home_win[i] = 1 if ((away_game_score[i] > home_game_score[i]) or \ (away_game_score[i] < home_game_score[i])): i = i + 1 n_home_win = sum(home_win) return n_home_win / niterations
def samples_deaths(self, new_infections, fatality_ratio, time_to_death, niu, k): r""" Computes samples for the number of deaths at time step :math:`k` in specified region, given the simulated timeline of susceptible number of individuals, for all age groups in the model. The number of deaths is assumed to be distributed according to a negative binomial distribution with mean .. math:: \mu_{r,t_k,i} = p_i \sum_{l=0}^{k} f_{k-l} \delta_{r,t_l,i}^{infec} and variance :math:`\mu_{r,t_k,i} (\nu + 1)`, where :math:`p_i` is the age-specific fatality ratio for age group :math:`i`, :math:`f_{k-l}` is the probability of demise :math:`k-l` days after infection and :math:`\delta_{r,t_l,i}^{infec}` is the number of new infections in specified region, for age group :math:`i` on day :math:`t_l`. It uses an output of the simulation method for the PheSEIRModel, taking all the rest of the parameters necessary for the computation from the way its simulation has been fitted. Parameters ---------- new_infections (numpy.array) Number of new infections from the simulation method for the PheSEIRModel. fatality_ratio List of age-specific fatality ratios. time_to_death List of probabilities of death of individual d days after infection. niu Dispesion factor for the negative binomial distribution. k Index of day for which we intend to sample the number of deaths for by age group. Returns ------- Array of log-likelihoods for the obsereved number of deaths for each age group in specified region at time :math:`t_k`. Notes ----- Always run :meth:`PheSEIRModel.new_infections` and :meth:`PheSEIRModel.check_death_format` before running this one. """ self._check_time_step_format(k) # Compute mean of negative-binomial return nbinom.rvs( n=niu * self.mean_deaths(fatality_ratio, time_to_death, k, new_infections), p=niu / (1 + niu))
def draw_nbinom_dataset(draw, n_samples): """ Generate random samples from a negative binomial model. Assumes that draw is a sample from the model parameter space (probably posterior but could be prior) and that the 1st element of the draw is the burst rate and the 2nd element is the mean burst size. """ pp_samples = neg_binom.rvs(draw[0], (1 + draw[1])**(-1), size=n_samples) return np.unique(pp_samples, return_counts=True)
def simulate_ge(self, negative_binomial): # dimension of initial space (i.e number of genes) self.W = np.random.normal(loc=0, scale=0.5, size=(self.dim, self.latent)) self.beta = np.random.normal(loc=0, scale=0.5, size=self.dim) #self.W = np.random.normal(loc=0, scale=1.0, size=(self.latent, self.dim)) #self.beta = np.random.normal(loc=0, scale=1.0, size=self.dim) self.mu = np.clip(a=np.exp(self.z @ self.W.T + self.beta), a_min=0, a_max=1e5) if negative_binomial: print('=== Negative Binomial simulations ===') #g = gamma.rvs(self.alpha, scale=self.mu / self.alpha) #self.X = np.asarray(poisson(g), dtype=np.float64) r, p = convert_params_NB(mu=self.mu, alpha=self.alpha) self.X = nbinom.rvs(n=r, p=p) else: self.X = np.asarray(poisson(self.mu), dtype=np.float64) if self.vis: ## Poissson distribution fig, axes = plt.subplots( 1, 1, figsize=(14, 8), sharey=True, ) bins = np.arange(0, 30, 5) cm = plt.cm.get_cmap('RdYlBu_r') n, binss, patches = axes.hist( self.X, bins=bins, edgecolor='black', ) # set color of patches # scale values to interval [0,1] bin_centers = 0.5 * (binss[:-1] + binss[1:]) col = bin_centers - min(bin_centers) col /= max(col) for c, p in zip(col, patches): plt.setp(p, 'facecolor', cm(c)) axes.set_title('Histogram of simulated gene expression data') plt.ylabel('Counts') plt.xlabel('Gene Expression value') plt.legend(['gene_' + str(i) for i in list(range(self.dim))], loc='best') plt.show()
def C1(self): print('Subsampling C1.') snv = nbinom.rvs( self.size, self.size / (self.size + self.snv.reshape(3, 3, -1, self.p, self.samples))) self._C1 = tf.constant(snv, dtype=self.dtype) if self.verbose: print('C1:', self._C1.shape) return self._C1
def corner_spread(home_corners, away_corners, corner_mean, niterations): random.seed(1234) game_home_mean = [0] * niterations game_away_mean = [0] * niterations game_corner_mean = [0] * niterations over_corner_mean_counter = [0] * niterations i = 0 n_count = 4.0 while i < niterations: game_home_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + home_corners), size=1)[0] game_away_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + away_corners), size=1)[0] game_corner_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + corner_mean), size=1)[0] home_plus_away = game_home_mean[i] + game_away_mean[i] if home_plus_away > game_corner_mean[i]: over_corner_mean_counter[i] = 1 if (game_corner_mean[i] > home_plus_away) or (game_corner_mean[i] < home_plus_away): i += 1 n_over_corner_mean_count = sum(over_corner_mean_counter) return n_over_corner_mean_count / float(niterations)
def sample_usage(): t_arr = np.ones(100) m = 10 theta = .7 ps = t_arr / (t_arr + theta) n_arr = nbinom.rvs(m, ps) NBinom.loglik(n_arr, t_arr, m, theta) NBinom.numeric_grad(n_arr, t_arr, m, theta) nb2 = NBinom2(n_arr, t_arr) params = nb2.gradient_descent(verbose=True) return sum(params - np.array([m, theta])) < 1e-1
def observationGenerator(states, parameters): #input states is 2D: numStates x dimObs, parameters is 1D obs = zeros((states.shape[0], 2)) for i in range(states.shape[0]): #obs[i,:] = transpose(random.multivariate_normal(repeat(states[i,0],1), [[parameters[0] * states[i,0] ** 2]], size = 2)) p = 1 / (1 + parameters[0] * states[i,0]) p = minimum(p, 1-1e-7) p = maximum(p, 1e-7) n = maximum(1, floor( states[i,0] * p / (1-p) ) ).astype(int32) obs[i,:] = nbinom.rvs(n, p, 2) return obs
def C2(self): sub_set = np.ones_like(self.other) sub_set[np.where(np.isnan(self.other))] = 0 self.other[np.where(np.isnan(self.other))] = 0 self.C2_nans = tf.constant(sub_set, dtype=self.dtype) print('Subsampling C2') other = nbinom.rvs(self.size, self.size / (self.size + self.other)) self._C2 = tf.constant(other, dtype=self.dtype) if self.verbose: print('C2:', self._C2.shape) return self._C2
def __init__(self): # total sessions this user will have: self.num_sessions = 1 + int(uniform.rvs() > sessions_zero_inflation ) * nbinom.rvs(4, beta.rvs(12, 10)) self.first_session = fuzz_time(local_epoch) self.session_starts = [ fuzz_time(self.first_session) for i in range(self.num_sessions - 1) ] + [self.first_session] self.next_session = self.first_session self.guid = uuid4() self._current_cart = 0 # num items currently in cart
def gentestcase2(nsg=10): ''' The second testcase, 2 samples, control and treatment ''' vark = 0.01 # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]]) desmat = np.matrix([[1, 0, 0], [0, 1, 0], [0, 1, 1], [1, 1, 1]]) (nsample, nbeta) = desmat.shape # basic parameters sks = SimCaseSimple() sks.prefix = 'sample2' sks.design_mat = desmat sks.beta0 = [random.uniform(3, 10) for i in range(nsg)] # these are the base sks.beta1 = [random.random() * 5 for i in range(nbeta)] # treatments;size: nbeta print('beta_0:' + '\t'.join([decformat(x) for x in sks.beta0])) print('beta_1:' + '\t'.join([decformat(x) for x in sks.beta1])) # mean and variance mu0 = [math.exp(t) for t in sks.beta0] # size: nsg tprod = desmat * np.matrix(sks.beta1).getT() # size: nsample*1 tprodlist = [x[0] for x in tprod.tolist()] # size: nsample*1 sks.mu = [mu0] for nr in range(nsample): sgi = [math.exp(t + tprodlist[nr]) for t in sks.beta0] sks.mu += [sgi] # sks.var0=[t+vark*(t*t) for t in sks.mu0] sks.var = [[t + vark * (t * t) for t in tl] for tl in sks.mu] for i in range(nsample + 1): # including 1 base and n samples print('mu_:' + str(i) + '\t'.join([decformat(x) for x in sks.mu[i]])) print('var_:' + str(i) + '\t'.join([decformat(x) for x in sks.var[i]])) # parameters for generating NB counts #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)] #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)] #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)] #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_p = [[sks.mu[t][i] / sks.var[t][i] for i in range(nsg)] for t in range(nsample + 1)] sks.nb_r = [[ sks.mu[t][i] * sks.mu[t][i] / (sks.var[t][i] - sks.mu[t][i]) for i in range(nsg) ] for t in range(nsample + 1)] # #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)] #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_count = [[ nbinom.rvs(sks.nb_r[t][i], sks.nb_p[t][i]) for i in range(nsg) ] for t in range(nsample + 1)] return (sks)
def gentestcase3(nsg=10,desmat=None): ''' The third testcase, with efficient ''' vark=0.01 effiprob=0.5 # the probability that a sgRNA is efficient # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]]) if desmat==None: # desmat=np.matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,1]]) desmat=np.matrix([[1,0,0,1],[0,1,1,1],[1,0,1,0]]).getT() (nsample,nbeta)=desmat.shape # basic parameters sks=SimCaseSimple() sks.prefix='sample3' sks.design_mat=desmat #sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base #sks.beta1=[(random.random())*5 for i in range(nbeta)] # treatments;size: nbeta sks.beta0=[random.uniform(5,10) for i in range(nsg)] # these are the base sks.beta1=[(random.random()*2-1)*5 for i in range(nbeta)] # treatments;size: nbeta print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0])) print('beta_1:'+'\t'.join([decformat(x) for x in sks.beta1])) # efficiency sks.isefficient=[ (lambda x: 1 if x>=effiprob else 0)(random.random()) for i in range(nsg)] # mean and variance mu0=[math.exp(t) for t in sks.beta0] # size: nsg tprod=desmat*np.matrix(sks.beta1).getT() # size: nsample*1 tprodlist=[x[0] for x in tprod.tolist()] # size: nsample*1 sks.mu=[mu0] for nr in range(nsample): sgi=[math.exp(sks.beta0[ti]+tprodlist[nr]*sks.isefficient[ti]) for ti in range(nsg)] sks.mu+=[sgi] # sks.var0=[t+vark*(t*t) for t in sks.mu0] sks.var=[[t+vark*(t*t) for t in tl] for tl in sks.mu] for i in range(nsample+1): # including 1 base and n samples print('mu_:'+str(i)+'\t'.join([decformat(x) for x in sks.mu[i]])) print('var_:'+str(i)+'\t'.join([decformat(x) for x in sks.var[i]])) # parameters for generating NB counts #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)] #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)] #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)] #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_p=[[sks.mu[t][i]/sks.var[t][i] for i in range(nsg)] for t in range(nsample+1)] sks.nb_r=[[sks.mu[t][i]*sks.mu[t][i]/(sks.var[t][i]-sks.mu[t][i]) for i in range(nsg)] for t in range(nsample+1)] # #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)] #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_count=[[nbinom.rvs(sks.nb_r[t][i],sks.nb_p[t][i]) for i in range(nsg)] for t in range(nsample+1)] print('efficient: '+' '.join([str(x) for x in sks.isefficient])) return (sks)
def get_num_random_interactions(age, random_network_params_dict, child_upper_ix, adult_upper_ix): if age <= child_upper_ix: mean = random_network_params_dict['CHILD']['mu'] sd = random_network_params_dict['CHILD']['sigma'] elif age <= adult_upper_ix: mean = random_network_params_dict['ADULT']['mu'] sd = random_network_params_dict['ADULT']['sigma'] else: mean = random_network_params_dict['ELDERLY']['mu'] sd = random_network_params_dict['ELDERLY']['sigma'] p = mean / (sd * sd) n = mean * mean / (sd * sd - mean) num_interactions = nbinom.rvs(n, p) return num_interactions
def bin_neg_simulation(k, p, n=10000, odd=99): """ Funcao que retorna o valor de uma distribuicao binomial negativa frente aos parametros dados """ if k <= 0: return None a = 100 - odd b = odd + (100 - odd) r = nbinom.rvs(k, p, size=n) return int( scipy.stats.tmean( r, (scipy.stats.scoreatpercentile( r, a), scipy.stats.scoreatpercentile(r, b)), ))
def negbinomial_dist(n, mu, max_value=10, min_value=0, num_values=10000, integers=False): """ generate a negative binomial distribution """ p = 1 / ((mu / n) + 1) random_list = nbinom.rvs(n=n, p=p, size=num_values) # Negative binomial function return scale_a_distribution(random_list, integers=integers, max_value=max_value, min_value=min_value)
def _random_noise(df, noise_factor): r""" Generates random noise on an observable by a Negative Binomial :math:`NB`. References to the negative binomial can be found `here <https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Negative_Binomial_Regression.pdf>`_ . .. math:: O &\sim NB(\mu=datapoint,\alpha) We keep the alpha parameter low to obtain a small variance which should than always be approximately the size of the mean. Parameters ---------- df : new_cases , pandas.DataFrame Observable on which we want to add the noise noise_factor: :math:`\alpha` Alpha factor for the random number generation Returns ------- array : 1-dim observable with added noise """ def convert(mu, alpha): r = 1 / alpha p = mu / (mu + r) return r, 1 - p # Apply noise on every column for column in df: # Get values array = df[column].to_numpy() for i in range(len(array)): if (array[i] == 0) or (np.isnan(array[i])): continue log.debug(f"Data {array[i]}") r, p = convert(array[i], noise_factor) log.info(f"n {r}, p {p}") mean, var = nbinom.stats(r, p, moments="mv") log.debug(f"mean {mean} var {var}") array[i] = nbinom.rvs(r, p) log.debug(f"Drawn {array[i]}") df[column] = array return df
def play(self, player1, player2, n_rounds=None): if n_rounds is None: n_rounds = nbinom.rvs(1,0.2,1) + 1 player1.reset() player2.reset() total_payoffs = np.zeros((1,2)) p1_payoff = 0 p1_action = None p1_results = {} p2_payoff = 0 p2_action = None p2_results = {} payoffs = np.zeros((1,2)) p1_trace = [] p2_trace = [] for i in range(0, n_rounds): if i == 0: p1_action = player1.get_initial_action() p2_action = player2.get_initial_action() else: p1_results['payoff'] = p1_payoff p1_results['action'] = p1_action p2_results['payoff'] = p2_payoff p2_results['action'] = p2_action p1_action = player1.get_action([p1_results, p2_results]) p2_action = player2.get_action([p2_results, p1_results]) p1_trace.append(p1_action) p2_trace.append(p2_action) p1_payoff = self.payoff_matrix[p1_action, p2_action] p2_payoff = self.payoff_matrix[p2_action, p1_action] total_payoffs += [[p1_payoff, p2_payoff]] traces = (p1_trace, p2_trace) avg_payoffs = total_payoffs / float(n_rounds) return avg_payoffs, traces
def inversion(self): if (len(self.sequence) <= 1): pos = 0 else: pos = random.randint(0, len(self.sequence) - 1) cfg = AppSettings() p = cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length) length = nbinom.rvs( 1, cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length)) self.sequence = self.sequence[:pos] + \ self.sequence[pos:(pos + length)][::-1] + \ self.sequence[(pos + length):]
def update_state(self, state, rolling_time): # land, land_item, add_to_cart, enter_checkout, enter_address, enter_ccard, complete r = uniform.rvs() new_state = '' if state == 'land_homepage': if r < .8: new_state = 'land_item' rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .5)) elif state == 'land_item': if r < .3: new_state = 'land_item' rolling_time += timedelta(minutes=1 + nbinom.rvs(2, .5)) elif r < .7: new_state = 'add_to_cart' rolling_time += timedelta(minutes=1 + nbinom.rvs(2, .5)) elif self._current_cart > 0 and r < .8: new_state = 'enter_checkout' rolling_time += timedelta(minutes=1 + nbinom.rvs(4, .5)) elif state == 'add_to_cart': if r < .6: new_state = 'enter_checkout' rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .5)) elif r < .9: new_state = 'land_item' rolling_time += timedelta(minutes=1 + nbinom.rvs(4, .5)) elif state == 'enter_checkout': if r < .98: new_state = 'enter_address' rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .8)) elif state == 'enter_address': if r < .97: new_state = "enter_ccard" rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .9)) elif state == 'enter_ccard': if r < .9: new_state = 'complete' rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .8)) return new_state, rolling_time
def gentestcase1(nsg=10): ''' The first testcase, 2 samples, control and treatment ''' vark = 0.01 # basic parameters sks = SimCaseSimple() sks.beta0 = [random.uniform(3, 10) for i in range(nsg)] # these are the base sks.beta1 = [random.random() * 5] # treatment print('beta_0:' + '\t'.join([decformat(x) for x in sks.beta0])) print('beta_1:' + decformat(sks.beta1[0])) # mean and variance sks.mu = [[math.exp(t) for t in sks.beta0]] for t in sks.beta0: sks.mu += [[math.exp(t + sks.beta1[0]) for t in sks.beta0]] #sks.var0=[t+vark*(t*t) for t in sks.mu0] #sks.var1=[[t+vark*(t*t) for t in sks.mu1[0]]] sks.var = [[t + vark * (t * t) for t in sks.mu[i]] for i in range(2)] #print('mu_0:'+'\t'.join([decformat(x) for x in sks.mu0])) #print('var_0:'+'\t'.join([decformat(x) for x in sks.var0])) #print('mu_1:'+'\t'.join([decformat(x) for x in sks.mu1[0]])) #print('var_1:'+'\t'.join([decformat(x) for x in sks.var1[0]])) # parameters for generating NB counts #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)] #sks.nb_p1=[[sks.mu1[0][i]/sks.var1[0][i] for i in range(nsg)]] sks.nb_p = [[sks.mu[j][i] / sks.var[j][i] for i in range(nsg)] for j in range(2)] #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)] #sks.nb_r1=[[sks.mu1[0][i]*sks.mu1[0][i]/(sks.var1[0][i]-sks.mu1[0][i]) for i in range(nsg)]] sks.nb_r = [[ sks.mu[j][i] * sks.mu[j][i] / (sks.var[j][i] - sks.mu[j][i]) for i in range(nsg) ] for j in range(2)] # #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)] #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[0][i],sks.nb_p1[0][i]) for i in range(nsg)]] sks.nb_count = [[ nbinom.rvs(sks.nb_r[j][i], sks.nb_p[j][i]) for i in range(nsg) ] for j in range(2)] # design matrix # sks.design_mat=getsimpledesignmat(nsg) sks.design_mat = np.matrix([[1]]) return (sks)
def gentestcase2(nsg=10): ''' The second testcase, 2 samples, control and treatment ''' vark=0.01 # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]]) desmat=np.matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,1]]) (nsample,nbeta)=desmat.shape # basic parameters sks=SimCaseSimple() sks.prefix='sample2' sks.design_mat=desmat sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base sks.beta1=[random.random()*5 for i in range(nbeta)] # treatments;size: nbeta print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0])) print('beta_1:'+'\t'.join([decformat(x) for x in sks.beta1])) # mean and variance mu0=[math.exp(t) for t in sks.beta0] # size: nsg tprod=desmat*np.matrix(sks.beta1).getT() # size: nsample*1 tprodlist=[x[0] for x in tprod.tolist()] # size: nsample*1 sks.mu=[mu0] for nr in range(nsample): sgi=[math.exp(t+tprodlist[nr]) for t in sks.beta0] sks.mu+=[sgi] # sks.var0=[t+vark*(t*t) for t in sks.mu0] sks.var=[[t+vark*(t*t) for t in tl] for tl in sks.mu] for i in range(nsample+1): # including 1 base and n samples print('mu_:'+str(i)+'\t'.join([decformat(x) for x in sks.mu[i]])) print('var_:'+str(i)+'\t'.join([decformat(x) for x in sks.var[i]])) # parameters for generating NB counts #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)] #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)] #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)] #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_p=[[sks.mu[t][i]/sks.var[t][i] for i in range(nsg)] for t in range(nsample+1)] sks.nb_r=[[sks.mu[t][i]*sks.mu[t][i]/(sks.var[t][i]-sks.mu[t][i]) for i in range(nsg)] for t in range(nsample+1)] # #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)] #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)] sks.nb_count=[[nbinom.rvs(sks.nb_r[t][i],sks.nb_p[t][i]) for i in range(nsg)] for t in range(nsample+1)] return (sks)
def nextGenStoch(self,inf=True,BFOD=True): # this function is currently depricated # some of these seeds die from BFOD if BFOD: self.numSeeds = binom.rvs(n=self.numSeeds,p=self.BFODmat)[:] # some of remaining seeds get pathogen infected germ = [] if inf: infSeeds = binom.rvs(n=self.numSeeds,p=self.pInf)[:] unInfSeeds = np.subtract(self.numSeeds,infSeeds)[:] # some of seeds germinate germ = binom.rvs(p=self.germMatUninf,n=unInfSeeds)[:] germ = np.add(germ,binom.rvs(p=self.germMatInf,n=infSeeds)[:])[:] else: germ = binom.rvs(p=self.germMatUninf,n=self.numSeeds)[:] # subtract out germinated seeds from total number of seeds to keep in seed bank self.numSeeds = np.subtract(self.numSeeds,germ)[:] # incorporate perennials that survived self.species = np.add(binom.rvs(p=self.perSurv,n=self.species.astype(int)).astype(int),binom.rvs(p=self.annuals,n=germ.astype(int).astype(int))).astype(int)[:] # competition compMat = np.exp(-1.*np.dot(self.compParams,self.species)) num_seeds_this_gen = np.zeros(self.numSpec) for i in range(self.numSpec): num_seeds_this_gen[i] += np.sum(nbinom.rvs(n=self.get_r(compMat,i),p=self.negBinP[i],size=self.species[i])) # infection if inf: num_seeds_this_gen = binom.rvs(p=self.infMat,n=num_seeds_this_gen.astype(int))[:] # add in new seeds to model self.numSeeds = np.add(self.numSeeds,num_seeds_this_gen.astype(int))[:] # add seedlings from perrenials to seedlings, and ones that survived from last year to adults self.seedlings = np.array([germ[0],germ[1],0,0,0])[:] self.species = np.add(self.species,binom.rvs(p=self.perTrans[3]/(1.+(self.seedlings[0]+self.seedlings[1])*self.perTrans[0]+(self.species[0]+self.species[1])*self.perTrans[2] +(self.species[2]+self.species[3]+self.species[4])*self.perTrans[1]),n=self.seedlings.astype(int)).astype(int))[:] #i = 0 #for spec in num_seeds_this_gen: # print spec,self.species[i], " ", # i += 1 #print return (self.species,self.numSeeds)
def gentestcase1(nsg=10): ''' The first testcase, 2 samples, control and treatment ''' vark=0.01 # basic parameters sks=SimCaseSimple() sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base sks.beta1=[random.random()*5] # treatment print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0])) print('beta_1:'+decformat(sks.beta1[0])) # mean and variance sks.mu=[[math.exp(t) for t in sks.beta0]] for t in sks.beta0: sks.mu+=[[math.exp(t+sks.beta1[0]) for t in sks.beta0]] #sks.var0=[t+vark*(t*t) for t in sks.mu0] #sks.var1=[[t+vark*(t*t) for t in sks.mu1[0]]] sks.var=[[t+vark*(t*t) for t in sks.mu[i]] for i in range(2)] #print('mu_0:'+'\t'.join([decformat(x) for x in sks.mu0])) #print('var_0:'+'\t'.join([decformat(x) for x in sks.var0])) #print('mu_1:'+'\t'.join([decformat(x) for x in sks.mu1[0]])) #print('var_1:'+'\t'.join([decformat(x) for x in sks.var1[0]])) # parameters for generating NB counts #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)] #sks.nb_p1=[[sks.mu1[0][i]/sks.var1[0][i] for i in range(nsg)]] sks.nb_p=[[sks.mu[j][i]/sks.var[j][i] for i in range(nsg)] for j in range(2)] #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)] #sks.nb_r1=[[sks.mu1[0][i]*sks.mu1[0][i]/(sks.var1[0][i]-sks.mu1[0][i]) for i in range(nsg)]] sks.nb_r=[[sks.mu[j][i]*sks.mu[j][i]/(sks.var[j][i]-sks.mu[j][i]) for i in range(nsg)] for j in range(2)] # #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)] #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[0][i],sks.nb_p1[0][i]) for i in range(nsg)]] sks.nb_count=[[nbinom.rvs(sks.nb_r[j][i],sks.nb_p[j][i]) for i in range(nsg)] for j in range(2)] # design matrix # sks.design_mat=getsimpledesignmat(nsg) sks.design_mat=np.matrix([[1]]) return (sks)
def sample_negative_binomial(p, r): return int(nbinom.rvs(r, p))
def meanVar(_files, _gff_file , _output): NFILE=len(_files) if NFILE == 1: sys.stderr.write("Need at least two samples for each group.\n") sys.exit(1) ##### _dict_counts = dict() ## dictionary of gene counts _genes = HTSeq.GenomicArrayOfSets("auto",stranded=False) idx=0 if MODE == "all-genes": for feature in _gff_file: if feature.type in GENE: _dict_counts[ feature.name ] = [0]*NFILE _genes[feature.iv] += feature.name if feature.type in TX: if feature.attr["geneID"] not in _dict_counts: _dict_counts[feature.attr["geneID"]] = [0]*NFILE _genes[feature.iv] += feature.attr["geneID"] if MODE == "AS-genes": ## Bug: Does not report last gene in gff if it has at least two transcript transcript= set() cur_line = None last_gene_id = None for feature in _gff_file: if feature.type in GENE: if len(transcript) >1: _dict_counts[ cur_line.name ] = [0]*NFILE _genes[cur_line.iv] += cur_line.name cur_line = feature transcript.clear() if feature.type in TX: key = None if "geneID" in feature.attr: key = "geneID" elif "Parent" in feature.attr: key = "Parent" else: sys.stderr.write("transcript line does not have Parent or geneID field\n") if last_gene_id == feature.attr[key]: transcript.add(feature.attr["ID"]) else: if len(transcript) > 1: if feature.attr[key] not in _dict_counts: _dict_counts[feature.attr[key]] = [0]*NFILE _genes[feature.iv] += feature.attr[key] transcript.clear() transcript.add(feature.attr["ID"]) last_gene_id = feature.attr[key] if feature.type in EXON: transcript.add(feature.attr["Parent"]) print "num of genes to simulate: ", len(_dict_counts) _file_raw_count = open(_output+'.rawcounts','w') _file_nb_count = open(_output+'.nbcounts','w') ## This loop read through the input list and call countSam for each input file for f in _files: sam_file=HTSeq.SAM_Reader(f) _dict_counts=countSam(sam_file, _genes,_dict_counts, idx) f.close() idx += 1 sys.stderr.write("library %d has generated.\n" % idx) ## Print raw counts in file specified by <out> for key, value in sorted(_dict_counts.iteritems()): _file_raw_count.write(key+"\t"+"\t".join(map(str,value))+"\n") _file_raw_count.close() ## calculate group mean and variance list_mean = list() list_var = list() for key, value in sorted(_dict_counts.iteritems()): list_mean.append(np.mean(np.array(value))) list_var.append(np.var(np.array(value))) ## computer loess esimates ## The following code is using rpy2 module a = robjects.FloatVector(list_mean) b = robjects.FloatVector(list_var) df = robjects.DataFrame({"mean": a, "var": b}) non0_df=df.rx(df.rx2("mean").ro > 0, True) ## subsetting if mean > 0 loess_fit = r.loess("var ~ mean", data=non0_df, degree=2) ''' #good-of-fit test: variance=r.predict(loess_fit, 1000) print variance[0] print (1000*1000)/(variance[0]-1000) ''' var_pred = r.predict(loess_fit, a) # This loop overwrite global variable dict_counts for recoding new count data count_idx = 0 for key, value in sorted(_dict_counts.iteritems()): n = math.pow(list_mean[count_idx],2)/(var_pred[count_idx]-list_mean[count_idx]) n = int(n) # n: number of failures if n<=0: _dict_counts[key] = [0]*NREPS else: p = n/float(n+list_mean[count_idx]) # p: prob of success _dict_counts[key] = nbinom.rvs(n, p, size=NREPS).tolist() count_idx += 1 #var_pred = r.predict(loess_fit, a) for key, value in sorted(_dict_counts.iteritems()): _file_nb_count.write(key+"\t"+"\t".join(map(str,value))+"\n") _file_nb_count.close() _file_raw_count.close() return _dict_counts
import statsmodels.api as sm # Data np.random.seed(141) # set seed to replicate example nobs= 2500 # number of obs in model x1 = binom.rvs(1, 0.6, size=nobs) # categorical explanatory variable x2 = uniform.rvs(size=nobs) # real explanatory variable theta = 0.303 X = sm.add_constant(np.column_stack((x1, x2))) beta = [1.0, 2.0, -1.5] xb = np.dot(X, beta) # linear predictor exb = np.exp(xb) nby = nbinom.rvs(exb, theta) mydata = {} # build data dictionary mydata['N'] = nobs # sample size mydata['X'] = X # predictors mydata['Y'] = nby # response variable mydata['K'] = len(beta) # Fit stan_code = """ data{ int N; int K; matrix[N,K] X; int Y[N];
def draw_from_negative_binomial(mu, phi): n = phi p = 1/(1+mu/phi) return nbinom.rvs(n, p)
print"##INFO=<ID=%s,Number=.,Type=%s,Description=\"%s\">" % (INFO.id, INFO.type, INFO.desc) for fmat in FORMAT: print"##FORMAT=<ID=%s,Number=.,Type=%s,Description=\"%s\">" % (fmat.id, fmat.type, fmat.desc) print '##analysis=simulate_dp.py --lambda %f --epsilon %f --dispersion_mean %f --dispersion_sd %f --seed %s' % (args.lamb, args.epsilon, args.dmean, args.dsd, str(args.seed)) print line continue fields = line.split() genotypes = fields[9:] samples = [] for gt in genotypes: # scipy nbinom takes (n, p) as arguments. # Convolution: sum_{i=1}{x}nbinom(n, p) = nbinom(xn, p). if gt.count('0')==0: dp_ref = 0 else: dp_ref = nbinom.rvs(lamb * gt.count('0') / ((d - 1) * 2), 1 / d) if gt.count('1')==0: dp_alt = 0 else: dp_alt = nbinom.rvs(lamb * gt.count('1') / ((d - 1) * 2), 1 / d) dp = dp_ref + dp_alt pl = calculate_pl(dp_ref, dp_alt) if pl: sample = "%s:%i,%i:%i:%i,%i,%i" % (gt, dp_ref, dp_alt, dp, pl[0], pl[1], pl[2]) samples.append(sample) else: samples.append(gt) info = '%s;Dispersion=%s' % (fields[7], '{0:.3f}'.format(d)) output = fields[:7] + [info] + ['GT:AD:DP:PL'] + samples print '\t'.join(output)
def main(): ### get command line options options = parse_options(sys.argv) ### parse parameters from options object CFG = settings.parse_args(options, identity='test') CFG['use_exon_counts'] = False ### generate output directory outdir = os.path.join(options.outdir, 'testing') if options.timestamp == 'y': outdir = '%s_%s' % (outdir, str(datetime.datetime.now()).replace(' ', '_')) if CFG['diagnose_plots']: CFG['plot_dir'] = os.path.join(options.outdir, 'plots') if not os.path.exists(CFG['plot_dir']): os.makedirs(CFG['plot_dir']) if options.labelA != 'condA' and options.labelB != 'condB': outdir = '%s_%s_vs_%s' % (outdir, options.labelA, options.labelB) if not os.path.exists(outdir): os.makedirs(outdir) if CFG['debug']: print "Generating simulated dataset" npr.seed(23) CFG['is_matlab'] = False #cov = npr.permutation(20000-20).astype('float').reshape(999, 20) #cov = sp.r_[cov, sp.c_[sp.ones((1, 10)) *10, sp.ones((1, 10)) * 500000] + npr.normal(10, 1, 20)] #sf = sp.ones((cov.shape[1], ), dtype='float') setsize = 50 ### diff event counts cov = sp.zeros((500, 2 * setsize), dtype='int') for i in range(10): cov[i, :setsize] = nbinom.rvs(30, 0.8, size=setsize) cov[i, setsize:] = nbinom.rvs(10, 0.8, size=setsize) for i in range(10, cov.shape[0]): cov[i, :] = nbinom.rvs(30, 0.8, size=2*setsize) ### diff gene expression cov2 = sp.zeros((500, 2 * setsize), dtype='int') for i in range(20): cov2[i, :setsize] = nbinom.rvs(2000, 0.2, size=setsize) cov2[i, setsize:] = nbinom.rvs(2000, 0.3, size=setsize) for i in range(20, cov2.shape[0]): cov2[i, :] = nbinom.rvs(2000, 0.3, size=2*setsize) cov = sp.c_[cov, cov2] * 10000 tidx = sp.arange(setsize) sf = npr.uniform(0, 5, 2*setsize) sf = sp.r_[sf, sf] #dmatrix0 = sp.ones((cov.shape[1], 3), dtype='bool') dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='float') dmatrix1[:, 0] = 1 dmatrix1[tidx, 1] = 1 #dmatrix1[tidx, 2] = 1 dmatrix1[tidx + (2*setsize), 2] = 1 dmatrix1[(2*setsize):, 3] = 1 #dmatrix1[:, 4] = sp.log(sf) dmatrix0 = dmatrix1[:, [0, 2, 3]] cov = cov * sf #sf = sp.ones((cov.shape[1], ), dtype='float') pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG) pvals_adj = adj_pval(pvals, CFG) pdb.set_trace() else: val_tag = '' if CFG['validate_splicegraphs']: val_tag = '.validated' if CFG['is_matlab']: CFG['fname_genes'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) CFG['fname_count_in'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) else: CFG['fname_genes'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.pickle' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) CFG['fname_count_in'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) condition_strains = None CFG['fname_exp_hdf5'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag)) if os.path.exists(CFG['fname_exp_hdf5']): if CFG['verbose']: print 'Loading expression counts from %s' % CFG['fname_exp_hdf5'] IN = h5py.File(CFG['fname_exp_hdf5'], 'r') gene_counts = IN['raw_count'][:] gene_strains = IN['strains'][:] gene_ids = IN['genes'][:] IN.close() else: if options.subset_samples == 'y': condition_strains = sp.unique(sp.r_[sp.array(CFG['conditionA']), sp.array(CFG['conditionB'])]) CFG['fname_exp_hdf5'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.%i.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag, hash(tuple(sp.unique(condition_strains))) * -1)) if os.path.exists(CFG['fname_exp_hdf5']): if CFG['verbose']: print 'Loading expression counts from %s' % CFG['fname_exp_hdf5'] IN = h5py.File(CFG['fname_exp_hdf5'], 'r') gene_counts = IN['raw_count'][:] gene_strains = IN['strains'][:] gene_ids = IN['genes'][:] IN.close() else: gene_counts, gene_strains, gene_ids = get_gene_expression(CFG, fn_out=CFG['fname_exp_hdf5'], strain_subset=condition_strains) gene_strains = sp.array([x.split(':')[1] if ':' in x else x for x in gene_strains]) ### estimate size factors for library size normalization sf_ge = get_size_factors(gene_counts, CFG) ### get index of samples for difftest idx1 = sp.where(sp.in1d(gene_strains, CFG['conditionA']))[0] idx2 = sp.where(sp.in1d(gene_strains, CFG['conditionB']))[0] ### for TESTING #setsize = 100 #idx1 = sp.arange(0, setsize / 2) #idx2 = sp.arange(setsize / 2, setsize) ### subset expression counts to tested samples gene_counts = gene_counts[:, sp.r_[idx1, idx2]] sf_ge = sf_ge[sp.r_[idx1, idx2]] #sf = sp.r_[sf, sf] ### test each event type individually for event_type in CFG['event_types']: if CFG['verbose']: print 'Testing %s events' % event_type CFG['fname_events'] = os.path.join(CFG['out_dirname'], 'merge_graphs_%s_C%i.counts.hdf5' % (event_type, CFG['confidence_level'])) ### quantify events (cov, gene_idx, event_idx, event_ids, event_strains) = quantify.quantify_from_counted_events(CFG['fname_events'], sp.r_[idx1, idx2], event_type, CFG) ### estimate size factors sf_ev = get_size_factors(sp.vstack(cov), CFG) sf = sp.r_[sf_ev, sf_ge] assert(sp.all(gene_strains == event_strains)) ### map gene expression to event order curr_gene_counts = gene_counts[gene_idx, :] ### filter for min expression if event_type == 'intron_retention': k_idx = sp.where((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | \ (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac']))[0] else: k_idx = sp.where(((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | \ (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac'])) & \ (sp.mean(sp.c_[cov[0][:, :idx1.shape[0]], cov[1][:, :idx1.shape[0]]] == 0, axis=1) < CFG['max_0_frac']) & \ (sp.mean(sp.c_[cov[0][:, idx2.shape[0]:], cov[1][:, idx2.shape[0]:]] == 0, axis=1) < CFG['max_0_frac']))[0] if CFG['verbose']: print 'Exclude %i of %i %s events (%.2f percent) from testing due to low coverage' % (cov[0].shape[0] - k_idx.shape[0], cov[0].shape[0], event_type, (1 - float(k_idx.shape[0]) / cov[0].shape[0]) * 100) if k_idx.shape[0] == 0: print 'All events of type %s were filtered out due to low coverage. Please try re-running with less stringent filter criteria' % event_type continue # k_idx = sp.where((sp.mean(sp.c_[cov[0], cov[1]], axis=1) > 2))[0] # k_idx = sp.where((sp.mean(cov[0], axis=1) > 2) & (sp.mean(cov[1], axis=1) > 2))[0] cov[0] = cov[0][k_idx, :] cov[1] = cov[1][k_idx, :] curr_gene_counts = curr_gene_counts[k_idx, :] event_idx = event_idx[k_idx] gene_idx = gene_idx[k_idx] event_ids = [x[k_idx] for x in event_ids] cov[0] = sp.around(sp.hstack([cov[0], curr_gene_counts])) cov[1] = sp.around(sp.hstack([cov[1], curr_gene_counts])) cov = sp.vstack(cov) event_ids = sp.hstack(event_ids) tidx = sp.arange(idx1.shape[0]) #if CFG['debug']: # for i in range(cov.shape[0]): # fig = plt.figure(figsize=(8, 6), dpi=100) # ax = fig.add_subplot(111) # ax.hist(cov[i, :] * sf, 50, histtype='bar', rwidth=0.8) # #ax.plot(sp.arange(cov.shape[1]), sorted(cov[i, :]), 'bo') # ax.set_title('Count Distribution - Sample %i' % i ) # plt.savefig('count_dist.%i.pdf' % i, format='pdf', bbox_inches='tight') # plt.close(fig) ### build design matrix for testing dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='bool') dmatrix1[:, 0] = 1 # intercept dmatrix1[tidx, 1] = 1 # delta a dmatrix1[tidx, 2] = 1 # delta g dmatrix1[tidx + (idx1.shape[0] + idx2.shape[0]), 2] = 1 # delta g dmatrix1[(idx1.shape[0] + idx2.shape[0]):, 3] = 1 # is g dmatrix0 = dmatrix1[:, [0, 2, 3]] ### make event splice forms unique to prevent unnecessary tests event_ids, u_idx, r_idx = sp.unique(event_ids, return_index=True, return_inverse=True) if CFG['verbose']: print 'Consider %i unique event splice forms for testing' % u_idx.shape[0] ### run testing #pvals = run_testing(cov[u_idx, :], dmatrix0, dmatrix1, sf, CFG, r_idx) pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG) pvals_adj = adj_pval(pvals, CFG) ### write output out_fname = os.path.join(outdir, 'test_results_C%i_%s.tsv' % (options.confidence, event_type)) if CFG['verbose']: print 'Writing test results to %s' % out_fname s_idx = sp.argsort(pvals_adj) header = sp.array(['event_id', 'gene', 'p_val', 'p_val_adj']) event_ids = sp.array(['%s_%i' % (event_type, i + 1) for i in event_idx], dtype='str') if CFG['is_matlab']: data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx], 0], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')] else: data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx]], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')] data_out = sp.r_[header[sp.newaxis, :], data_out] sp.savetxt(out_fname, data_out, delimiter='\t', fmt='%s')
from scipy.stats import norm, uniform, nbinom np.random.seed(1656) # set seed to replicate example N = 2000 # number of obs in model NGroups = 10 x1 = uniform.rvs(size=N) x2 = uniform.rvs(size=N) Groups = np.array([200 * [i] for i in range(NGroups)]).flatten() a = norm.rvs(loc=0, scale=0.5, size=NGroups) eta = 1 + 0.2 * x1 - 0.75 * x2 + a[list(Groups)] mu = np.exp(eta) y = nbinom.rvs(mu, 0.5) # Code 8.23 Random intercept negative binomial model in Python using Stan import pystan X = sm.add_constant(np.column_stack((x1,x2))) K = X.shape[1] model_data = {} model_data['Y'] = y model_data['X'] = X model_data['K'] = K model_data['N'] = N model_data['NGroups'] = NGroups
def generate_count(options): numGene = options.numEntry numSampleConA = options.numSampleConA numSampleConB = options.numSampleConB nParamNB = options.nParamNB pParamNB = options.pParamNB beta1 = options.beta1 beta2 = options.beta2 output = options.output # First generate the mean read count for each gene. Assume this mean value follows NB distribution (Observed from real data). mu = nbinom.rvs(nParamNB, pParamNB, loc=0.0, size=numGene) # If the mean of certain genes are 0, change them as 1. idx = np.nonzero(mu == 0.0)[0] mu[idx] = 1.0 # Generate dispersions for all genes. if not options.dispFile: # Generate dispersions as a function of mean count for all genes. disper = beta1 / mu + beta2 else: # Load the dispersions to generate the count. disper = np.loadtxt(options.dispFile, dtype=float, skiprows=0, usecols=(0,)) if disper.size != numGene: sys.stderr.write('\nError: The number of specified dispersions is not the same with number of genes!\n\n') sys.exit() # Add Gaussian distributed noise to log(dispersion). if options.addDisperError: std = options.addDisperError errorNorm = norm.rvs(loc=0.0, scale=std, size=numGene) disper = np.exp(np.log(disper) + errorNorm) muA = mu.copy() muB = mu.copy() # For some genes, generate read count with different mean value in different conditions. if options.numDiff or options.diffFile: # Fold change genes are randomly selected, or are chosen as indicated by file. if not options.diffFile: numDiff = options.numDiff # The number of genes showing increased and decreased mean count is equal or 1 less. numDiffUp = numDiff / 2 numDiffDn = numDiff - numDiffUp idx = random.sample(range(numGene), numDiff) idxUp = random.sample(idx, numDiffUp) idxDn = np.setdiff1d(idx, idxUp) else: diffInfo = np.loadtxt(options.diffFile, dtype=int, skiprows=0, usecols=(0,)) idxUp = (diffInfo==2).nonzero()[0] idxDn = (diffInfo==1).nonzero()[0] numDiffUp = idxUp.size numDiffDn = idxDn.size numDiff = numDiffUp + numDiffDn if numDiff > options.numEntry: print 'numDiff should be smaller than numGene!' sys.exit() shapeParam = options.shapeGamma scaleParam = options.scaleGamma # Assume fold changes of mean count of different genes follow gamma distribution. If fold change value of increased gene set is x, the decreased set is 1/x. foldDiffUp = gamma.rvs(a=shapeParam, scale=scaleParam, loc=1.0, size=numDiffUp) foldDiffDn = 1.0 / gamma.rvs(a=shapeParam, scale=scaleParam, loc=1.0, size=numDiffDn) # Change the mean count of condition A and condition B without changing the overall mean count across the two conditions. # (MeanCountA + MeanCountB) / 2 = MeanCountOrigin & MeanCountA * FoldChange = MeanCountB # Assume there is a negative correlation between mean count and fold change. idxUpMem = np.searchsorted(np.sort(mu[idxUp]), mu[idxUp]) idxDnMem = np.searchsorted(np.sort(mu[idxDn]), mu[idxDn]) muAnewUp = 2 * np.sort(mu[idxUp]) / (np.sort(foldDiffUp)[::-1] + 1) muBnewUp = muAnewUp * np.sort(foldDiffUp)[::-1] muA[idxUp] = muAnewUp[idxUpMem] muB[idxUp] = muBnewUp[idxUpMem] muAnewDn = 2 * np.sort(mu[idxDn]) / (np.sort(foldDiffDn) + 1) muBnewDn = muAnewDn * np.sort(foldDiffDn) muA[idxDn] = muAnewDn[idxDnMem] muB[idxDn] = muBnewDn[idxDnMem] n = 1.0 / disper pA = n / (n + muA) pB = n / (n + muB) numDigits = len(str(numGene)) with open(output, 'w') as FileOut: FileOut.write('Entry\t' + 'conditionA\t'*numSampleConA + 'conditionB\t'*numSampleConB + 'Dispersion\t' + 'MeanCondA\t' + 'MeanCondB\t' + 'MeanFoldChange\t' + 'SetAsDiff\n') for i in range(numGene): z = numDigits - len(str(i+1)) name = 'G' + '0'*z + str(i+1) # The dispersion parameter (1/n) is the same for both conditions. The probability parameters are different if there is fold change in mean count for different conditions. countListA = nbinom.rvs(n[i], pA[i], size=numSampleConA).tolist() countListB = nbinom.rvs(n[i], pB[i], size=numSampleConB).tolist() countList = countListA + countListB countString = '\t'.join(str(element) for element in countList) if not options.numDiff: setAsDiff = '-1' elif i in idxUp: setAsDiff = '1' elif i in idxDn: setAsDiff = '2' else: setAsDiff = '0' FileOut.write(name + '\t' + countString + '\t' + str(disper[i]) + '\t' + str(np.mean(countListA)) + '\t' + str(np.mean(countListB)) + '\t' + str((np.mean(countListB)+1e-5)/(np.mean(countListA)+1e-5)) + '\t' + setAsDiff + '\n')
def evaluate(pop, intel_penalty=0.01, game=None): n = len(pop) total_payoffs = np.zeros((1, n)) rate_of_coop = np.zeros((1,n)) class NNPlayer(games.Player): def __init__(self, nnet): self.nnet = nnet def reset(self): self.nnet.reset() def get_initial_action(self): return self.nnet.initial_move def get_action(self, prev_results): prev_payoffs = [r['payoff'] for r in prev_results] output = self.nnet.activate(prev_payoffs) if type(output) == np.ndarray: output = output[0,0] else: output = output if np.random.rand() < output: return 1 else: return 0 for i in range(n): for j in range(i+1, n): n_rounds = nbinom.rvs(1, 0.2, 1) + 1 payoffs, traces = game.play(NNPlayer(pop[i]), NNPlayer(pop[j]), n_rounds) total_payoffs[0,i] += payoffs[0,0] total_payoffs[0,j] += payoffs[0,1] rate_of_coop[0, i] += np.mean(traces[0]) rate_of_coop[0, j] += np.mean(traces[1]) total_payoffs /= float(n-1) rate_of_coop /= float(n-1) for i in range(n): #pop[i].fitness.values = [total_payoffs[0,i] - intel_penalty*pop[i].get_intelligence()] pop[i].fitness.values = [total_payoffs[0,i], pop[i].get_intelligence()] pop[i].rate_of_coop = rate_of_coop[0, i] # Examine the strategies in the population. class AlwaysCooperatePlayer(games.Player): def get_initial_action(self): return 1 def get_action(self, prev_payoffs): return 1 class AlwaysDefectPlayer(games.Player): def get_initial_action(self): return 0 def get_action(self, prev_results): return 0 class TitForTatPlayer(games.Player): def get_initial_action(self): return 1 def get_action(self, prev_results): # Default cooperate, but defect if opponent defected. if prev_results[1]['action'] == 0: return 0 else: return 1 class TitForTwoTatsPlayer(games.Player): def __init__(self): self.opponent_defected = False def reset(self): self.opponent_defected = False def get_initial_action(self): return 1 def get_action(self, prev_results): # Default cooperate, but defect if opponent defected twice # in a row. if prev_results[1]['action'] == 0 and self.opponent_defected: return 0 elif prev_results[1]['action'] == 0: self.opponent_defected = True return 1 else: self.opponent_defected = False return 1 class PavlovPlayer(games.Player): def get_initial_action(self): return 1 def get_action(self, prev_results): return prev_results[1]['action'] class ProbabilisticPlayer(games.Player): def __init__(self, prob): # prob -> probability of cooperating self.prob = prob def get_initial_action(self): self.get_action(None) def get_action(self, prev_results): if np.random.rand() <= prob: return 1 else: return 1 probs = [0.0, 0.25, 0.5, 0.75, 1.0] n_games = 5 n_rounds = 20 n_total = len(probs)*n_games*n_rounds test_players = [AlwaysCooperatePlayer(), AlwaysDefectPlayer(), TitForTatPlayer(), TitForTwoTatsPlayer(), PavlovPlayer()] test_player_moves = np.zeros((len(test_players), n_total)) pop_moves = np.zeros((len(pop), n_total)) opp_moves = np.zeros(len(probs)*n_games*n_rounds) start_idx = 0 for p in probs: for i in range(n_games): stop_idx = start_idx + n_rounds random_trace = [np.random.rand() < p for i in range(n_rounds)] random_trace = np.array(random_trace, dtype='float') opp_moves[start_idx:stop_idx] = random_trace for (j, player) in enumerate(test_players): _, traces = game.play_against_trace(player, random_trace) test_player_moves[j, start_idx:stop_idx] = traces[0] for (j, indiv) in enumerate(pop): _, traces = game.play_against_trace(NNPlayer(indiv), random_trace) pop_moves[j, start_idx:stop_idx] = traces[0] start_idx += n_rounds # (X - Y)^2 = X^2 - 2XY + Y^2 pop_squared = np.square(pop_moves).sum(axis=1) test_squared = np.square(test_player_moves).sum(axis=1) pop_test = np.dot(pop_moves, test_player_moves.T) sq_dists = pop_squared[:,np.newaxis] - 2 * pop_test + test_squared[np.newaxis,:] sq_dists /= n_total closest_strat = sq_dists.argmin(axis=1) for (i, indiv) in enumerate(pop): indiv.closest_strategy = closest_strat[i] indiv.strategy_dists = sq_dists[i,:] return pop