def generate_unconditioned_example(): return list( zip( convert_multinomial_sample(R.multinomial(num_data, create_multinomial())), shuffle(convert_multinomial_sample(R.multinomial(num_data, create_multinomial()))) ) )
def sample_random_read(gene, true_psi, read_len, overhang_len): """ Sample a random read (not taking into account overhang) from the given set of exons and the true Psi value. Note that if we're given a gene that has only two isoforms, the 'align' function of gene will return a read summary in the form of (NI, NE, NB) rather than an alignment to the two isoforms (which is a pair (0/1, 0/1)). """ iso_lens = [iso.len for iso in gene.isoforms] num_positions = array([(l - read_len + 1) for l in iso_lens]) # probability of sampling a particular position from an isoform -- assume uniform for now iso_probs = [1/float(n) for n in num_positions] psi_frag_denom = sum(num_positions * array(true_psi)) psi_frags = [(num_pos * curr_psi)/psi_frag_denom for num_pos, curr_psi \ in zip(num_positions, true_psi)] # Choose isoform to sample read from chosen_iso = list(multinomial(1, psi_frags)).index(1) isoform_position_prob = ones(num_positions[chosen_iso]) * iso_probs[chosen_iso] sampled_read_start = list(multinomial(1, isoform_position_prob)).index(1) sampled_read_end = sampled_read_start + read_len - 1 # seq = gene.isoforms[chosen_iso].seq[sampled_read_start:sampled_read_end] # alignment, category = gene.align(seq, overhang=overhang_len) ## ## Trying out new alignment method ## # convert coordinates to genomic genomic_read_start, genomic_read_end = \ gene.isoforms[chosen_iso].isoform_coords_to_genomic(sampled_read_start, sampled_read_end) alignment, category = gene.align_read(genomic_read_start, genomic_read_end, overhang=overhang_len) return (tuple(alignment), [sampled_read_start, sampled_read_end], category, chosen_iso)
def sample(self, T, s_init=None,x_init=None,y_init=None): """ Inputs: T: time to run simulation Outputs: xs: Hidden continuous states Ss: Hidden switch states """ x_dim, y_dim, = self.x_dim, self.y_dim # Allocate Memory xs = zeros((T, x_dim)) Ss = zeros(T) # Compute Invariant _, vl = linalg.eig(self.Z, left=True, right=False) pi = vl[:,0] # Sample Start conditions sample = multinomial(1, pi, size=1) if s_init == None: Ss[0] = nonzero(sample)[0][0] else: Ss[0] = s_init if x_init == None: xs[0] = multivariate_normal(self.mus[Ss[0]], self.Sigmas[Ss[0]]) else: xs[0] = x_init # Perform time updates for t in range(0,T-1): s = Ss[t] A = self.As[s] b = self.bs[s] Q = self.Qs[s] xs[t+1] = multivariate_normal(dot(A,xs[t]) + b, Q) sample = multinomial(1,self.Z[s],size=1)[0] Ss[t+1] = nonzero(sample)[0][0] return (xs, Ss)
def generate_conditioned_example(): return list( chain( *( [(x, y) for y in convert_multinomial_sample(R.multinomial(num, create_multinomial()))] for x, num in enumerate(R.multinomial(num_data, create_multinomial())) ) ) )
def generate_corpus_vocab(args): vocab = Vocab() for i in xrange(1,args.num_words+1): vocab.add_word('word_' + str(i)) ## this indicates how many vocab words to allocate for the vocab background_v_p = args.background_vocab_prop ## find the lengths of the background vs. non-background portions of the vocab background_vocab = int(len(vocab) * background_v_p) non_back_vocab = len(vocab) - background_vocab ## set up the non-background vocab span lengths: note we don't ## add an offset of background_vocab here, but we will have to later on larger_v_span = int(non_back_vocab * .5) smaller_v_span = non_back_vocab - larger_v_span ## Now find out how many words per doc to draw ## this indicates how many words per document to draw from the vocab background_prop = args.generated_background_prop back_words_per_doc = int(args.words_per_doc * background_prop) nb_words_per_doc = args.words_per_doc - back_words_per_doc larger_num = int(nb_words_per_doc * args.gen_bias) smaller_num = nb_words_per_doc - larger_num ## now set up the probability spans background_p = background_vocab * ( [ 1.0/float(background_vocab) ] if background_vocab > 0 else [0.0]) larger_p = larger_v_span * [ 1.0/float(larger_v_span) ] smaller_p = smaller_v_span * [ 1.0/float(smaller_v_span) ] left_doc_draw = larger_num right_doc_draw = smaller_num corpus = [] for di in xrange(args.num_docs): # swap the pointers at the halfway point if di == int(args.num_docs/2): left_doc_draw = smaller_num right_doc_draw = larger_num doc = {'id' : 'doc_'+str(di), 'label' : None, 'words' : [], 'counts' : []} counter = defaultdict(int) # print "NEW DOC" for (index,count) in enumerate(multinomial(back_words_per_doc, background_p)): if count > 0: counter[index] = count # print "\tbackground: %d (%d)" % (index, count) for (index,count) in enumerate(multinomial(left_doc_draw, larger_p)): if count > 0: counter[index + background_vocab] = count # print "\tleft: %d (%d)" % (index + background_vocab, count) for (index, count) in enumerate(multinomial(right_doc_draw, smaller_p)): if count > 0: counter[index + background_vocab + larger_v_span] = count # print "\tright: %d (%d)" % (index + background_vocab + larger_v_span, count) for (word, count) in counter.iteritems(): doc['words'].append(word) doc['counts'].append(count) corpus.append(doc) return (corpus, vocab)
def RandIntVec(self,ListSize, ListSumValue, Distribution='Normal'): """ Inputs: ListSize = the size of the list to return ListSumValue = The sum of list values Distribution = can be 'uniform' for uniform distribution, 'normal' for a normal distribution ~ N(0,1) with +/- 3 sigma (default), or a list of size 'ListSize' or 'ListSize - 1' for an empirical (arbitrary) distribution. Probabilities of each of the p different outcomes. These should sum to 1 (however, the last element is always assumed to account for the remaining probability, as long as sum(pvals[:-1]) <= 1). Output: A list of random integers of length 'ListSize' whose sum is 'ListSumValue'. """ if type(Distribution) == list: DistributionSize = len(Distribution) if ListSize == DistributionSize or (ListSize-1) == DistributionSize: Values = multinomial(ListSumValue,Distribution,size=1) OutputValue = Values[0] elif Distribution.lower() == 'uniform': #I do not recommend this!!!! I see that it is not as random (at least on my computer) as I had hoped UniformDistro = [1/ListSize for i in range(ListSize)] Values = multinomial(ListSumValue,UniformDistro,size=1) OutputValue = Values[0] elif Distribution.lower() == 'normal': """ Normal Distribution Construction....It's very flexible and hideous Assume a +-3 sigma range. Warning, this may or may not be a suitable range for your implementation! If one wishes to explore a different range, then changes the LowSigma and HighSigma values """ LowSigma = -3#-3 sigma HighSigma = 3#+3 sigma if (float(ListSize) - 1) == 0: StepSize = 0 else: StepSize = 1/(float(ListSize) - 1) ZValues = [(LowSigma * (1-i*StepSize) +(i*StepSize)*HighSigma) for i in range(int(ListSize))] #Construction parameters for N(Mean,Variance) - Default is N(0,1) Mean = 0 Var = 1 #NormalDistro= [self.NormalDistributionFunction(Mean, Var, x) for x in ZValues] NormalDistro= list() for i in range(len(ZValues)): if i==0: ERFCVAL = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2)) NormalDistro.append(ERFCVAL) elif i == len(ZValues) - 1: ERFCVAL = NormalDistro[0] NormalDistro.append(ERFCVAL) else: ERFCVAL1 = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2)) ERFCVAL2 = 0.5 * math.erfc(-ZValues[i-1]/math.sqrt(2)) ERFCVAL = ERFCVAL1 - ERFCVAL2 NormalDistro.append(ERFCVAL) #print "Normal Distribution sum = %f"%sum(NormalDistro) Values = multinomial(ListSumValue,NormalDistro,size=1) OutputValue = Values[0] else: raise ValueError ('Cannot create desired vector') return OutputValue
def getColor(self): return self.color arg = argmax(multinomial(1, [self.histcons, self.emocons], size=1)) if (arg == 0): emo = argmax(multinomial(1, [self.pos, self.neg, self.neutral], size=1)) if(emo == 0): return self.pos_color if(emo == 1) return self.neg_color if(emo == 2): return self.neutral_color
def categorical(p, size=None): """ Cat K = Multi 1 K """ assert 0.99<sum(p)<1.01 if size is None: return list(sample.multinomial(1, p)).index(1) elif type(size)==int: n = size return array([list(sample.multinomial(1, p)).index(1) for _ in xrange(n)]) elif len(size)==2: n,m = size return array([[list(sample.multinomial(1, p)).index(1) for _ in xrange(m)] for _ in xrange(n)]) else:pass
def generator(self): """ Simulate data from the sticky HDP-HMM. """ self.state = [list(np.where(multinomial(1, dirichlet(self.beta), self.N))[1])] for i in range(1, self.T): self.state.append(list(np.where(multinomial(1, self.PI[i, :]))[0][0] for i in self.state[-1])) for i in range(self.T): self.data.append([normal(self.clusterPars[j][0], self.clusterPars[j][1]) for j in self.state[i]]) self.state = np.array(self.state) self.data = np.array(self.data)
def simulate_combat(self, allowed_time, ant_0_scoring = ConservativeScore, ant_1_scoring = ConservativeScore, log = None): start = time.time() score_0 = ant_0_scoring(self, 0) score_1 = ant_1_scoring(self, 1) self.allowed_policies() init_poses = dict( (a, a.pos) for a in self.ants) killed = [] steps = 0 while (time.time() - start) < allowed_time: steps += 1 action = {} for k in killed: self.add_ant(k) for a,p in init_poses.iteritems(): a.pos = p for ant in self.ants: ps = dirichlet(self.policy[ant]) i = multinomial(1, ps).nonzero()[0][0] if not (self.move_direction(ant, self.actions[i])): print "CAZZZ" action[ant] = i killed = self.step_turn() for a, p in self.policy.iteritems(): if a.owner == 0: p[action[a]] += score_0(self) else: p[action[a]] += score_1(self) for k in killed: self.add_ant(k) for a,p in init_poses.iteritems(): a.pos = p retpolicy = {} for a,p in self.policy.iteritems(): ps = dirichlet(p) i = multinomial(1, ps).nonzero()[0][0] retpolicy[a] = self.actions[i] if log is not None: log.info("Number of steps: %d", steps) else: print "Number of steps: ", steps return retpolicy
def sample_rho(v0_range, v1_range, v0_num_grid, v1_num_grid, K, num_1_vec, num_0_vec, p): v0_grid = np.linspace(v0_range[0], v0_range[1], v0_num_grid) v1_grid = np.linspace(v1_range[0], v1_range[1], v1_num_grid) posterior_grid = np.zeros((v0_num_grid, v1_num_grid)) for ii, v0 in enumerate(v0_grid): for jj, v1 in enumerate(v1_grid): rho0, rho1 = transform_var_poly(v0, v1, p) posterior_grid[ii, jj] = compute_rho_posterior(rho0, rho1, K, num_1_vec, num_0_vec) posterior_grid = np.exp(posterior_grid - posterior_grid.max()) posterior_grid /= (posterior_grid.sum()) #print((posterior_grid)); v_sample = np.where(multinomial(1, posterior_grid.reshape(-1)))[0][0] v0 = v0_grid[int(v_sample // v1_num_grid)] v1 = v1_grid[int(v_sample % v1_num_grid)] rho0, rho1 = transform_var_poly(v0, v1, p) return rho0, rho1, posterior_grid
def SamplesFromGaussianMixture(Probs, Means, CovarianceMatrices, SampleCount, TrivialCovariances=False, Precision=np.float_, ChoicesPrecision=np.int_): MixtureCount = Probs.shape[0] Dimension = Means.shape[1] CholeskyMatrices = CovarianceMatrices if not TrivialCovariances: CholeskyMatrices = np.linalg.cholesky(CovarianceMatrices) # K x d x d # Means - K x d # CholeskyMatrices - # K x d x d ResultSet = np.empty(shape=(Dimension, SampleCount), dtype=Precision) # d x N Choices = np.zeros(SampleCount, dtype=ChoicesPrecision) MixturesToSample = multinomial(SampleCount, Probs) GeneratedSamples = 0 for MixtureInd in range(MixtureCount): Count = MixturesToSample[MixtureInd] ZMatrix = normal(size=(Dimension, Count)) if not TrivialCovariances: ZMatrix = np.dot(CholeskyMatrices[MixtureInd], ZMatrix) ResultSet[:, GeneratedSamples:(GeneratedSamples + Count)] = ZMatrix + Means[MixtureInd].reshape(Means[MixtureInd].shape[0], 1) Choices[GeneratedSamples:(GeneratedSamples + Count)] = MixtureInd GeneratedSamples += Count return ResultSet, Choices
def get_next_sample(self, mask, test_mask=None): p = multinomial(1, self.p_weights).argmax() _X = self._reconstruct(self.E[p], self.R[p], False) _X[mask[:self.n_pure_relations] == 1] = MIN_VAL if test_mask is not None: _X[test_mask == 1] = MIN_VAL return np.unravel_index(_X.argmax(), _X.shape)
def sample_last(zt, wt, yt, n_mat, ysum, ycnt, beta_vec, beta_new, kappa_vec, kappa_new, alpha0, gamma0, sigma0, mu0, sigma0_pri, rho0, rho1, K): T = len(zt) ########### last time point ########## t = T - 1 j = zt[t - 1] if wt[t] == 0: n_mat[j, zt[t]] -= 1 ysum[zt[t]] -= yt[t] ycnt[zt[t]] -= 1 ## conpute posterior distributions tmp_vec = np.arange(K) zt_dist = (alpha0 * beta_vec + n_mat[j]) / (alpha0 + n_mat[j].sum()) knew_dist = alpha0 * beta_new / (alpha0 + n_mat[j].sum()) ## compute y marginal likelihood varn = 1 / (1 / (sigma0_pri**2) + ycnt / (sigma0**2)) mun = ((mu0 / (sigma0_pri**2)) + (ysum / (sigma0**2))) * varn yt_dist = ss.norm.pdf(yt[t], mun, np.sqrt((sigma0**2) + varn)) yt_knew_dist = ss.norm.pdf(yt[t], mu0, np.sqrt((sigma0**2) + (sigma0_pri**2))) ## construct z,w's posterior by cases post_cases = np.hstack( (kappa_vec[j] * yt_dist[j], (1 - kappa_vec[j]) * zt_dist * yt_dist, (1 - kappa_vec[j]) * knew_dist * yt_knew_dist)) ## sample zt, wt post_cases = post_cases / (post_cases.sum()) sample_rlt = np.where(multinomial(1, post_cases))[0][0] if sample_rlt < 1: zt[t], wt[t] = [j, 1] else: zt[t], wt[t] = [sample_rlt - 1, 0] ## update beta_vec, kappa_vec, n_mat when having a new state if zt[t] == K: b = beta(1, gamma0, size=1) beta_vec = np.hstack((beta_vec, b * beta_new)) kappa_vec = np.hstack((kappa_vec, kappa_new)) beta_new = (1 - b) * beta_new kappa_new = beta(rho0, rho1, size=1) n_mat = np.hstack((n_mat, np.zeros((K, 1)))) n_mat = np.vstack((n_mat, np.zeros((1, K + 1)))) ysum = np.hstack((ysum, 0)) ycnt = np.hstack((ycnt, 0)) K += 1 ## update n_mat if wt[t] == 0: n_mat[j, zt[t]] += 1 ysum[zt[t]] += yt[t] ycnt[zt[t]] += 1 return zt, wt, n_mat, ysum, ycnt, beta_vec, kappa_vec, beta_new, kappa_new, K
def get_sampled_dict_counts(ssize_list, dcounts): """ Often, we use a dict to keep counts of categories, classes, traits. Given a dict where objects to count are keys, and counts are values, take samples from the dict with sizes given in the ssize_list, and return a new dict with the requested ssize as key, and a dict with object:count_in_sample as value. :param ssize_list: :param dcounts: :return: dict with { ssize: { object: count }} for all ssize in ssize_list """ result = dict() total = sum(dcounts.values()) for ssize in ssize_list: if ssize > total: raise ValueError("sample size requested: %s is larger than the population: %s" % (ssize, total)) traits = [] prob = [] for trait, count in dcounts.items(): if count > 0: traits.append(trait) prob.append(float(count) / float(total)) sampled_counts = npr.multinomial(ssize,prob,size=1) count_list = sampled_counts.tolist() #log.debug("traits: %s total: %s prob: %s counts: %s", traits, total, prob, count_list) sampled_dict = dict(zip(traits, count_list[0])) result[ssize] = sampled_dict #log.debug("result from sampled dict: %s", result) return result
def grow(self): """Grow the population to carrying capacity The final population size is determined based on the proportion of producers present. This population is determined by drawing from a multinomial with the probability of each genotype proportional to its abundance times its fitness. """ if self.is_empty(): return if not self.diluted: return landscape = self.metapopulation.fitness_landscape final_size = self.capacity_min + \ (self.capacity_max - self.capacity_min) * \ self.prop_producers() grow_probs = self.abundances * (landscape/nsum(landscape)) if nsum(grow_probs) > 0: norm_grow_probs = grow_probs/nsum(grow_probs) self.abundances = multinomial(final_size, norm_grow_probs, 1)[0]
def sample_topic_freqs(word_topic_rates, mask): """Given observed_word_freqs, that each word was held out (if mask is 0) or not held out (if mask is 1), and each word w was produced via topic i according to a Poisson with rate word_topic_rates[w, i], returns a sample from the posterior distribution on the number of times a word was generated from each topic. """ # First, sample the contribution from words where mask was 0. Each # such word is sampled from a poisson distribution. The sum of poisson # distributions is poisson, so we only need to sample one poisson for # each topic. topic_freqs = \ poisson(np.sum((1 - mask)[:, np.newaxis] * word_topic_rates, 0)) # For each word where mask was 1, we sample from a multinomial # distribution with probabilities proportional to the word-topic rates # for that word. For efficiency, we skip over words that didn't occur # at all: multinomial_words is an array of the indices of words for # which we do need to take a sample. multinomial_words = np.arange(vocab_size)[ np.array(mask * observed_word_freqs, dtype = bool)] for word in multinomial_words: topic_rates = word_topic_rates[word, :] topic_freqs += multinomial(observed_word_freqs[word], topic_rates / sum(topic_rates)) return topic_freqs
def sample_assignments(W, B, grid_counts, z_curr, is_log=False): """ W is a K by Nplayer matrix of weights (per player) B is the K by Vtiles matrix of (positive) bases grid_counts is the Nplayer by V matrix of counts z_curr is the Nplayer by V tile by K 3D array of current assignments """ K = W.shape[0] # number of basis surfaces N = W.shape[1] # number of players or time steps V = B.shape[1] # number of spatial tiles # pass in log weights and basis (more numerically stable) if is_log: for n in range(N): for v in range(V): N_nv = grid_counts[n,v] if N_nv > 0: Lam = np.exp(W[:,n] + B[:,v]) Lam = Lam / Lam.sum() z_curr[n,v,:] = np.random.multinomial(N_nv, Lam, size=1)[0] else: z_curr[n,v,:] = np.zeros(K) else: for n in range(N): for v in range(V): N_nv = grid_counts[n,v] if N_nv > 0: Lam = W[:,n]*B[:,v] Lam = Lam / Lam.sum() z_curr[n,v,:] = npr.multinomial(N_nv, Lam, size=1)[0] else: z_curr[n,v,:] = np.zeros(K) return z_curr
def make_schedule(n_cat, n_total, max_repeat): """Generate an event schedule subject to a repeat constraint.""" # Make the uniform transition matrix ideal_tmat = [1 / n_cat] * n_cat # Build the transition matrices for when we've exceeded our repeat limit const_mat_list = [] for i in range(n_cat): const_mat_list.append([1 / (n_cat - 1)] * n_cat) const_mat_list[-1][i] = 0 # Convenience function to make the transitions cat_range = np.arange(n_cat) draw = lambda x: np.asscalar(cat_range[multinomial(1, x).astype(bool)]) # Generate the schedule schedule = [] for i in xrange(n_total): trailing_set = set(schedule[-max_repeat:]) # Check if we're at our repeat limit if len(trailing_set) == 1: tdist = const_mat_list[trailing_set.pop()] else: tdist = ideal_tmat # Assign this iteration's state schedule.append(draw(tdist)) return schedule
def barabasiAlbert(n, d): """Generates an undirected Barabasi-Albert random graph. A Barabasi-Albert (A.K.A. preferential atachment) random graph starts with a clique of d nodes, then adds nodes sequentially. Each new node is connected to d existing, chosen with probability proportional to the existing node's degree. en.wikipedia.org/wiki/Barab%C3%A1si%E2%80%93Albert_model n: number of nodes d: degree of each new node """ nodes = range(n) edges = set() degrees = np.zeros(n) for node in nodes: degrees[node] += 1 new_edges = set() while degrees[node] <= d and degrees[node] <= node: neighbor = list(multinomial(1, degrees / degrees.sum())).index(1) e = (node, neighbor) if (e in new_edges) or (e[0]==e[1]): continue new_edges.add(e) degrees[neighbor] += 1 degrees[node] += 1 edges.update(new_edges) return Graphs.UndirectedGraph(nodes, edges)
def total_zeros_multinomial_test(hit_table, ws, num_bootstraps=1000): ps = ws * 1.0 / ws.sum() ntots = hit_table.sum(axis=1) observed_prob = 0 for i in xrange(0, len(ntots)): observed_prob += calculate_zeros_multinomial_distance(hit_table[i], ws) bootstrapped_probs = [] for bootstrap_idx in xrange(0, num_bootstraps): bootstrapped_prob = 0 for i in xrange(0, len(ntots)): ns = multinomial(ntots[i], ps) bootstrapped_prob += calculate_zeros_multinomial_distance(ns, ws) bootstrapped_probs.append(bootstrapped_prob) bootstrapped_probs = numpy.array(bootstrapped_probs) pvalue = ((bootstrapped_probs >= observed_prob).sum() + 1.0) / (len(bootstrapped_probs) + 1.0) return observed_prob, bootstrapped_probs.mean( ), bootstrapped_probs.std() * 2, pvalue
def resampleOptionally(self): N = len(self.taus); effective_dimension = 1.0 / dot(self.weights, self.weights) if effective_dimension > self.resample_threshold*N: return; print 'RESAMPLING' old_taus_chosen = multinomial(N, self.weights); new_taus = array([]); mu, Xi = self.ensembleLogMeanVar(); Xi = self._h * sqrt( Xi ); 'MAIN LOOP:' 'Remember we are working with log-taus' for idx, Nchosen in enumerate(old_taus_chosen): if Nchosen == 0: continue 'get the ' log_tau_i = log(self.taus[idx]); mu_i = self._a*log_tau_i + (1-self._a)*mu; new_taus_i = normal(loc=mu_i, scale=Xi, size=Nchosen) new_taus = r_[ new_taus, new_taus_i] 'remember to exponentiate the draws:' self.taus = sort( exp(new_taus)) ; self.weights = ones_like(self.taus)/self.Ntaus();
def sendInfectedPass(self, infgroup, pdests): ''' if there is people infected, check where they are going to... inf = [Ip0pos,Ip2pos,Ip10pos,Ip15pos,Ip20pos,Ip40pos,Is0pos,Is2pos,Is10pos,Is15pos,Is20pos,Is40pos] ''' # Quantos vao sair? a = [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5] if (pdests): n = 0 # indexador do inf for i in infgroup: posdest = a[n] #print 'posdest=', posdest #print 'enviando %s para os destinos:'%i , pdests[posdest] if (int(i) & len(pdests[posdest])): destlist = pdests[posdest] #print 'destinos gerais de %s:'%a[n], destlist choosedest = multinomial(int(i),destlist.values()) k=0 for j in destlist: # enviando os infectados para seus destinos j.dest.infectedvisiting[n] += choosedest[k] k+=1 n+=1 return 1
def SimulateCreditNetwork(CN, price, events, DP, TR, BV, SC): """ CN - credit network DP - default probability array TR - transaction rate matrix BV - buy value matrix SC - sell cost matrix price - function to determine a price from value and cost events - number of transactions to simulate """ payoffs = dict([(n,0.) for n in CN.nodes]) defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes) for d in defaulters: for n in CN.nodes: if CN.adjacent(n, d): payoffs[n] -= CN.weights[(n, d)] CN.removeNode(d) del payoffs[d] m = R.multinomial(events, array(TR.flat)) l = TR.shape[0] transactors = sum([[(i/l,i%l)]*m[i] for i in range(l**2)], []) R.shuffle(transactors) for b,s in transactors: try: assert b in CN.nodes and s in CN.nodes CN.routePayment(b, s, price(BV[b,s], SC[b,s])) except (AssertionError, CreditError): continue payoffs[b] += BV[b,s] payoffs[s] -= SC[b,s] return payoffs
def population_coincidence_test(hit_table, ws, num_bootstraps=1000): ps = ws * 1.0 / ws.sum() ntots = hit_table.sum(axis=1) #print "Population coincidence test" #print ps #print ntots coincidences = (hit_table > 1.5).sum(axis=1) num_coincidences = (coincidences > 0.5).sum() * 1.0 num_total = len(coincidences) * 1.0 observed_fraction = num_coincidences * 1.0 / num_total bootstrapped_fractions = [] for bootstrap_idx in xrange(0, num_bootstraps): bootstrapped_num_coincidences = 0 bootstrapped_num_total = 0 for i in xrange(0, len(ntots)): ns = multinomial(ntots[i], ps) bootstrapped_num_total += 1 if (ns > 1.5).sum(): bootstrapped_num_coincidences += 1 bootstrapped_fractions.append(bootstrapped_num_coincidences * 1.0 / bootstrapped_num_total) bootstrapped_fractions = numpy.array(bootstrapped_fractions) pvalue = ((bootstrapped_fractions >= observed_fraction).sum() + 1.0) / (len(bootstrapped_fractions) + 1.0) print observed_fraction, bootstrapped_fractions.mean( ), bootstrapped_fractions.std() * 2, pvalue return observed_fraction, pvalue
def mean(data, num_resamples, percentiles=None): """Compute bootstrap bounds for the mean of a data set One particular use is compute bootstrap bounds on social welfare of a mixture if all of the samples are iid draws of welfare from the mixture. Parameters ---------- data : [float] or ndarray The data to get bootstrap estimates around the mean of. num_resamples : int The number of bootstrap samples. Higher will take longer but also give better accuracy. percentiles : int or [int] The percentiles to compute on the resulting data in [0, 100]. Standard percentiles are 95, or [2.5, 97.5]. """ data = np.asarray(data, float).ravel() samples = rand.multinomial(data.size, np.ones(data.size) / data.size, num_resamples) result = samples.dot(data) / data.size if percentiles is None: result.sort() return result else: return np.percentile(result, percentiles)
def clade_grouping_test(hit_table, num_bootstraps=1000): observed_loglikelihood = calculate_clade_grouping_loglikelihood(hit_table) ps = hit_table.sum(axis=0) * 1.0 / hit_table.sum() ntots = hit_table.sum(axis=1) bootstrapped_loglikelihoods = [] for bootstrap_idx in xrange(0, num_bootstraps): bootstrapped_hit_table = [] for i in xrange(0, len(ntots)): ns = multinomial(ntots[i], ps) bootstrapped_hit_table.append(ns) bootstrapped_hit_table = numpy.array(bootstrapped_hit_table) bootstrapped_loglikelihood = calculate_clade_grouping_loglikelihood( bootstrapped_hit_table) bootstrapped_loglikelihoods.append(bootstrapped_loglikelihood) bootstrapped_loglikelihoods = numpy.array(bootstrapped_loglikelihoods) pvalue = ((bootstrapped_loglikelihoods >= observed_loglikelihood).sum() + 1.0) / (len(bootstrapped_loglikelihoods) + 1.0) print observed_loglikelihood, bootstrapped_loglikelihoods.mean( ), bootstrapped_loglikelihoods.std() * 2, pvalue return observed_loglikelihood, pvalue
def chooseRand(dist): """Given a non-negative vector sum with 1(a discrete probability distribution) return randomly chosen element""" if sum(dist) != 1: raise ValueError, 'Distribution does not sum to 1' if reduce(lambda x,y:x*y, dist) < 0: raise ValueError, 'Non-negative probabilty' randstate = random.multinomial(1, dist) pos = map(lambda c,d:c * d, randstate, range(len(dist))) return reduce (lambda c,d: c + d, pos)
def mutate(self): """Mutate a Population Each genotype mutates to another with probability inversely proportional to the Hamming distance (# different bits in binary representation) between them. The distances between all pairs of genotypes is pre-calculated at the beginning of a run and stored in metapopulation.mutation_probs. """ if self.is_empty(): return if not self.diluted: return mutated_population = zeros(self.abundances.size, dtype=np.uint32) for i in np.nonzero(self.abundances)[0]: mutated_population += multinomial(self.abundances[i], self.metapopulation.mutation_probs[i], size=1)[0] self.abundances = mutated_population
def genCityCC(cityStateMap, cc2PopMap, city): total = 0 dist = [] codes = [] #print ("key, pop, county code") for state in cityStateMap: key = state.split(',')[0] if city == key: county_code = cityStateMap[state] codes.append(county_code) pop = cc2PopMap[county_code] dist.append(pop) total = total + pop #print (state, pop, county_code) if total == 0: return "" for i, item in enumerate(dist): dist[i] = dist[i]/total sample = random.multinomial(1, dist, size=1)[0] #print (sample) index = np.nonzero(sample)[0][0] #print (index) #print (codes[index]) return codes[index]
def initialize_discrete(X, R, depth, K, leaf_path, random=False): """ xs=continuous latent states R,r= hpyer planes depth=depth of tree K = number of leaf nodes leaf_paths = paths associated with each leaf """ Z = [] Path = [] for idx in range(len(X)): z = np.zeros(X[idx][0, :].size) path = np.zeros((depth, X[idx][0, :].size)) for t in range(X[idx][0, :].size): log_prob = utils.compute_leaf_log_prob(R, X[idx][:, t], K, depth, leaf_path) p = np.exp(log_prob - np.max(log_prob)) p = p/np.max(p) if random: # If true then sample from pmf defined by p choice = npr.multinomial(1, p, size=1) path[:, t] = leaf_path[:, np.where(choice[0, :] == 1)[0][0]].ravel() z[t] = np.where(choice[0, :] == 1)[0][0] else: # Initialize with bayes classifier choice = np.argmax(p) path[:, t] = leaf_path[:, choice].ravel() z[t] = choice Z.append(z) Path.append(path) return Z, Path
def generate_sbm_data(N, K, alpha, a, b, m=None): """ N is the number of nodes K is the number of blocks alpha is the concentration parameter a and b are the shape parameters m is the base measure """ if m == None: m = ones(K) / K # uniform base measure Z = zeros((N, K)) # block assignments # sample (global) distribution over blocks [theta] = dirichlet(alpha * m, 1) # sample between- and within-block edge probabilities phi = beta(a, b, (K, K)) # sample block assignments for n in range(1, N+1): Z[n-1,:] = multinomial(1, theta) # sample edges Y = (uniform(size=(N, N)) <= dot(dot(Z, phi), Z.T)).astype(int) return Z, Y
def __add_customer(self, context, restaurant, word): table_info = restaurant.w_tables_info(word) if len(table_info) != 0: tables_index, prob_mass = zip(*table_info.items()) else: tables_index, prob_mass = ((),()) prob_mass = [max(0, mass - 0.2) for mass in prob_mass] + [0.2 + 0.2 * restaurant.total_t] nmlz = 1 / sum(prob_mass) probs = [mass * nmlz for mass in prob_mass] random_index = multinomial(1, probs).tolist().index(1) if random_index >= len(tables_index): table_index = restaurant.table_next_index table = {"dish": word, "customer": 1} restaurant.tables[table_index] = table restaurant.table_next_index += 1 restaurant.total_t += 1 restaurant.total_c += 1 return False else: table_index = tables_index[random_index] table = restaurant.tables[table_index] table["customer"] += 1 restaurant.total_c += 1 return True
def act_(self, state): """Choose an action based on current state. """ if state is None: idx_action = randint(0, len(self.ACTIONS)) # if state cannot be internalized as state, random act if self.verbose > 0: print " QAgent: ", print "randomly choose action {} (None state).".format(self.ACTIONS[idx_action]) elif self.EXPLORE == 'epsilon': if rand() < self.EPSILON: # random exploration with "epsilon" prob. idx_action = randint(0, len(self.ACTIONS)) if self.verbose > 0: print " QAgent: ", print "randomly choose action (Epsilon)." else: # select the best action with "1-epsilon" prob., break tie randomly q_vals = self.lookup_table_(state) max_qval = max(q_vals) idx_best_actions = [i for i in range(len(q_vals)) if q_vals[i] == max_qval] idx_action = idx_best_actions[randint(0, len(idx_best_actions))] if self.verbose > 0: print " QAgent: ", print "choose best q among {} (Epsilon).".format( {self.ACTIONS[i]: q_vals[i] for i in range(len(self.ACTIONS))} ) elif self.EXPLORE == 'soft_probability': q_vals = self.lookup_table_(state) # state = internal_state exp_q_vals = exp(q_vals) idx_action = multinomial(1, exp_q_vals/sum(exp_q_vals)).nonzero()[0][0] if self.verbose > 0: print " QAgent: ", print "choose best q among {} (SoftProb).".format(dict(zip(self.ACTIONS, q_vals))) else: raise ValueError('Unknown keyword for exploration strategy!') return self.ACTIONS[idx_action]
def gen_random_labels(X: Union[np.ndarray, int], n_classes: int, pvec=None) -> np.ndarray: """ Returns a random labelling of dataset X. Labels are one-hot encoded, with `n_classes` dimensions. Accepts an optional vector of probabilities with which to generate each class. Args: X: number of labels to generate, integer or array-like. If array-like, `X.shape[0]` labels will be generated. n_classes: number of output classes pvec: array-like, gives probabilities of each class. If `None`, all classes are equiprobable. Returns: ndarray[N, n_classes], the random, one-hot encoded labels. """ if isinstance(X, int): num = X else: num = X.shape[0] pvec = np.ones((n_classes, )) / n_classes return npr.multinomial(1, pvec, size=num)
def sample_topic_assignments(self): # user topics for i in xrange(self.N): for j in I_U[i]: theta_U_star = np.zeros(self.K_U) for k_idx, k in enumerate(self.z_U[i,:]): theta_U_star[k_idx] = self.theta_U[i,j]*exp(-(X[(i,j)] - self.chi_0 - self.c_[i, self.z_V[i,j]] - self.d[k_idx, j] - np.dot(self.U_[i,:], self.V_[:,j]))**2/(2*self.sigmaSqd)) self.z_U[i,j] = multinomial(1, theta_U_star/sum(theta_U_star)) # item topics for j in xrange(self.M): for i in self.I_V[j]: theta_V_star = np.zeros(self.K_V) for k_idx, k in enumerate(self.z_V[:,j]): theta_V_star[k_idx] = self.theta_V[i,j]*exp(-(X[(i,j)] - self.chi_0 - self.c_[i, k_idx] - self.d[self.z_U[i,j], j] - np.dot(self.U_[i,:], self.V_[:,j]))**2/(2*self.sigmaSqd)) self.z_V[i,j] = multinomial(1, theta_V_star/sum(theta_V_star))
def main(): args = create_parser().parse_args() conf = json.load(args.configuration) roles = conf.pop('roles') mix = json.load(args.mixture) role_info = [(r, roles[r]) + tuple(zip(*s.items())) for r, s in mix.items()] num = itertools.count() if args.num_samples: num = itertools.islice(num, args.num_samples) try: for _ in num: samp = { role: { strat: int(count) for strat, count in zip(s, rand.multinomial(c, probs)) if count > 0 } for role, c, s, probs in role_info } conf['assignment'] = samp json.dump(conf, args.output) args.output.write('\n') except BrokenPipeError: pass
def makeRandomSpectrum(self, mols, quants, sigma, jP=None, prec_digits=None): """Simulate a mixture of isotopic envelopes. Parameters ---------- mols : list A list of molecular species: tuples containing (id, chemical formula string, something, charge, quenched charge) quants : list A list of total intensities of each molecular species. sigma : float The standard deviation of the masses of isotopologues - theoretical equivalent of the mass resolution. jP : float The joint probability of the theoretical isotopic envelope. prec_digits : float The number of digits after which the floats get rounded. Returns ------- spectrum : tuple A tuple containing the theoretical spectrum: mass over charge values and intensities. """ x0 = sum(quants) if not prec_digits: prec_digits = self.prec_digits if not jP: jP = self.jP def get_intensity_measure(mols, quants): for mol, quant in zip(mols, quants): _, atomCnt_str, _, q, g = mol ave_mz, ave_intensity = self.isoEnvelope( atomCnt_str=atomCnt_str, jP=jP, q=q, g=g, prec_digits=2) ave_intensity = quant * ave_intensity yield ave_mz, ave_intensity mz_average, intensity = reduce(merge_runs, get_intensity_measure(mols, quants)) probs = intensity / sum(intensity) counts = np.array(multinomial(x0, probs), dtype='int') if sigma > 0.0: spectrum = Counter() for m_average, cnt in zip(mz_average, counts): if cnt > 0: m_over_z = np.round( normal(loc=m_average, scale=sigma, size=cnt), prec_digits) spectrum.update(m_over_z) spectrum = np.array(spectrum.keys()), np.array( [float(spectrum[k]) for k in spectrum]) else: spectrum = (mz_average, counts) return spectrum
def test_JSD(): ALPHA, N, P = 1.0, 100, 20 random.seed(SEED) pk = random.dirichlet([ALPHA] * P) counts = random.multinomial(N, pk, size=4) estimator = ndd.estimators.JSDivergence() ref_result = -0.01804523405829217 assert numpy.isclose(estimator(counts), ref_result)
def test_KLD(): ALPHA, N, P = 1.0, 100, 20 random.seed(SEED) qk = random.dirichlet([ALPHA] * P) pk = random.multinomial(N, qk) estimator = ndd.kullback_leibler_divergence ref_result = -0.04299973796573253 assert numpy.isclose(estimator(pk, qk), ref_result)
def define_living_place(province): distribution = province.living_places total = distribution['Total'] probabilities = [distribution[x] / total for x in province_names] selection = random.multinomial(1, probabilities) for i in range(len(selection)): if selection[i]: return provinces[i]
def sample_newx(row): """ sample a tag :param row: index of the current POS (we want to find the next which is one of the columns in the row) :return: sample a tag """ return np.where(multinomial(1, t[row, :]) == 1)[0][0]
def rvs(self, x=None, size=[], return_xy=False): if x is None: assert isinstance(size, int) x = npr.randn(size, self.D_in) else: assert x.ndim == 2 and x.shape[1] == self.D_in pi = self.pi(x) if pi.ndim == 1: y = npr.multinomial(self.N, pi) elif pi.ndim == 2: y = np.array([npr.multinomial(self.N, pp) for pp in pi]) else: raise NotImplementedError return (x, y) if return_xy else y
def error(alpha, n): """Return the actual error and the estimated uncertainty (normalized)""" k = len(alpha) pvals = dirichlet(alpha) counts = multinomial(n, pvals) h0 = sp_entropy(pvals) h, std = ndd.entropy(counts, k=k, return_std=True) return (h - h0) / h0, std / h0
def test_JSD(): ALPHA, N, P = 1.0, 100, 20 random.seed(SEED) pk = random.dirichlet([ALPHA] * P) counts = random.multinomial(N, pk, size=4) estimator = ndd.divergence.JSDivergence() ref_result = -0.017281201076104313 assert numpy.isclose(estimator(counts), ref_result)
def smooth_comb(nsamp): inputs = [] rates = [] for k in xrange(6): inputs.append(((65-96)*2**-k/21, 32/63*2**(-2*k))) rates.append(2**(5-k)/63) counts = rand.multinomial(nsamp, rates, size=1)[0] return _generate(inputs, counts)
def discrete_comb(nsamp): inputs = [] for k in xrange(3): inputs.append(((2*k-15)/7, 2/7)) for k in xrange(8, 11): inputs.append((2*k/7, 1/21)) counts = rand.multinomial(nsamp, [2/7]*3+[1/21]*3, size=1)[0] return _generate(inputs, counts)
def asym_claw(nsamp): inputs = [(0, 1)] rates = [1/2] for k in xrange(-2, 3): inputs.append((k+1/2, 2**(-k)/10)) rates.append(2**(1-k)/31) counts = rand.multinomial(nsamp, rates, size=1)[0] return _generate(inputs, counts)
def generate_spiral2d( nspiral=1000, ntotal=500, nsample=100, start=0., stop=1, # approximately equal to 6pi noise_std=.1, a=0., b=1., savefig=True): # add 1 all timestamps to avoid division by 0 orig_ts = np.linspace(start, stop, num=ntotal) samp_ts = orig_ts[:nsample] # generate clock-wise and counter clock-wise spirals in observation space # with two sets of time-invariant latent dynamics zs_cw = stop + 1. - orig_ts rs_cw = a + b * 50. / zs_cw xs, ys = rs_cw * np.cos(zs_cw) - 5., rs_cw * np.sin(zs_cw) orig_traj_cw = np.stack((xs, ys), axis=1) zs_cc = orig_ts rw_cc = a + b * zs_cc xs, ys = rw_cc * np.cos(zs_cc) + 5., rw_cc * np.sin(zs_cc) orig_traj_cc = np.stack((xs, ys), axis=1) if savefig: plt.figure() plt.plot(orig_traj_cw[:, 0], orig_traj_cw[:, 1], label='clock') plt.plot(orig_traj_cc[:, 0], orig_traj_cc[:, 1], label='counter clock') plt.legend() plt.savefig('./ground_truth.png', dpi=500) print('Saved ground truth spiral at {}'.format('./ground_truth.png')) # sample starting timestamps orig_trajs = [] samp_trajs = [] for _ in range(nspiral): # don't sample t0 very near the start or the end t0_idx = npr.multinomial(1, [1. / (ntotal - 2. * nsample)] * (ntotal - int(2 * nsample))) t0_idx = np.argmax(t0_idx) + nsample cc = bool(npr.rand() > .5) # uniformly select rotation orig_traj = orig_traj_cc if cc else orig_traj_cw orig_trajs.append(orig_traj) samp_traj = orig_traj[t0_idx:t0_idx + nsample, :].copy() samp_traj += npr.randn(*samp_traj.shape) * noise_std samp_trajs.append(samp_traj) # batching for sample trajectories is good for RNN; batching for original # trajectories only for ease of indexing orig_trajs = np.stack(orig_trajs, axis=0) samp_trajs = np.stack(samp_trajs, axis=0) return orig_trajs, samp_trajs, orig_ts, samp_ts
def _get_standard_negative_triplets(graph_dataset, nodes, positives, num_negatives, neighbors_to_distances, distances_to_neighbors, max_neighbors, mode, is_val): """ Get negatives for each (node, positive_node) pair by randomly and uniformly sampling nodes which are farther away from a given node than the corresponding positive node. :return: (List[int], List[int], List[int]) representing nodes, positives and negatives respectively. """ filtered_nodes, filtered_positives, filtered_negatives = [], [], [] for idx, (node, pos_candidate) in enumerate(zip(nodes, positives)): pos_distance = neighbors_to_distances[idx][pos_candidate] negative_rs_to_neighbors = { r: elems for r, elems in distances_to_neighbors[idx].items() if r > pos_distance } negative_candidate_counts = [ len(radius_candidates) for radius, radius_candidates in sorted( negative_rs_to_neighbors.items()) ] total_candidates = sum(negative_candidate_counts) if total_candidates == 0: LOG.debug( f'Sampling random nodes as negative candidates for {node}, {pos_candidate}.' ) negative_candidates = _get_random_negative_nodes( graph_dataset, node, num_negatives, get_mask_from_mode(graph_dataset, mode), neighbors_to_distances[idx], pos_distance) elif is_val: negative_candidates = list( islice(chain.from_iterable(negative_rs_to_neighbors.values()), num_negatives)) else: normalized_candidate_counts = [ count / float(total_candidates) for count in negative_candidate_counts ] sampled_radii = multinomial(min(num_negatives, total_candidates), normalized_candidate_counts) negative_candidates = [] start_radius, end_radius = pos_distance + 1, pos_distance + 1 + len( sampled_radii) for radius, elems_count in zip(range(start_radius, end_radius), sampled_radii): negative_candidates.extend( SAMPLE( negative_rs_to_neighbors[radius], min(elems_count, len(negative_rs_to_neighbors[radius])))) for neg in negative_candidates: filtered_nodes.append(node) filtered_positives.append(pos_candidate) filtered_negatives.append(neg) return filtered_nodes, filtered_positives, filtered_negatives
def sample(self, params, xelems): if len(xelems) != self.xrank: raise ValueError(f'{self.xrank} inputs should be given; ' f'{len(xelems)} given instead!') probs = self.probs(params, xelems).ravel() yi = rnd.multinomial(1, probs).argmax() ys = np.unravel_index(yi, self.ydims) return ys
def sample(self, *xs): self.logger.debug(f'sample() \t; x={xs}') assert len(xs) == self.nx probs = self.probs[xs].ravel() yi = rnd.multinomial(1, probs).argmax() yidxs = np.unravel_index(yi, self.ydims) ys = tuple(s.elem(i) for s, i in zip(self.yspaces, yidxs)) return ys
def _sample(p): ''' Sample with probability vector p from a multinomial distribution :param p: list List of probabilities representing probability vector for the multinomial distribution :return: int index of randomly selected output ''' return [i for i, entry in enumerate(multinomial(1, p)) if entry != 0][0]
def get_prob_user(): df = show_info_of_user() s1 = df['work_times'].astype(int) s1[s1 == 0] = 1 s2 = 1 / (s1 / s1.sum()) prob = s2 / s2.sum() prob.iloc[-1] = prob.sum() - prob.iloc[:-1].sum() index = np.where(multinomial(1, prob) == 1)[0][0] return str(df.loc[index, 'username'])
def main(): # 20d6, sampled 30 times dice_roll_info = multinomial(20, [1 / 6] * 6, 30) rolls = [ sum([(k + 1) * row[k] for k in range(len(row))]) for row in dice_roll_info ] normalsorted = normalsort(rolls, bin_count=10, verbose=True) print(f'normalsort output:\n\t{normalsorted}')
def resample_population_matrix(population_matrix): ns = population_matrix.sum(axis=1) ps = population_matrix.sum(axis=0) * 1.0 ps /= ps.sum() bootstrapped_matrix = numpy.array([multinomial(n, ps) for n in ns]) return bootstrapped_matrix
def multinomial_sample(distribution): """Sample a random integer according to a multinomial distribution. @param distribution: probabilitiy distribution @type distribution: array of log probabilities @return: integer in the range 0 to the length of distribution @rtype: integer """ return multinomial(1, exp(distribution)).argmax()
def update_CR_dist(self): t = 1 Lm = 0 pm = 1. / self.nCR for i in range(self.nchains): m = multinomial(1, [pm] * self.nCR).nonzero()[0][0] + 1 CR = float(m) / self.nCR Lm += 1
def sample_random_read_pair(gene, true_psi, read_len, overhang_len, insert_len, mean_frag_len): """ Sample a random paired-end read (not taking into account overhang) from the given a gene, the true Psi value, read length, overhang length and the insert length (fixed). A paired-end read is defined as (genomic_left_read_start, genomic_left_read_end, genomic_right_read_start, genomic_right_read_start). Note that if we're given a gene that has only two isoforms, the 'align' function of gene will return a read summary in the form of (NI, NE, NB) rather than an alignment to the two isoforms (which is a pair (0/1, 0/1)). """ iso_lens = [iso.len for iso in gene.isoforms] num_positions = array([(l - mean_frag_len + 1) for l in iso_lens]) # probability of sampling a particular position from an isoform -- assume uniform for now iso_probs = [1/float(n) for n in num_positions] psi_frag_denom = sum(num_positions * array(true_psi)) psi_frags = [(num_pos * curr_psi)/psi_frag_denom for num_pos, curr_psi \ in zip(num_positions, true_psi)] # Choose isoform to sample read from chosen_iso = list(multinomial(1, psi_frags)).index(1) iso_len = gene.isoforms[chosen_iso].len frag_len = insert_len + 2*read_len isoform_position_probs = compute_read_pair_position_prob(iso_len, read_len, frag_len) # sanity check left_read_start = list(multinomial(1, isoform_position_probs)).index(1) left_read_end = left_read_start + read_len - 1 # right read starts after the left read and the insert length right_read_start = left_read_start + read_len + insert_len right_read_end = left_read_start + (2*read_len) + insert_len - 1 # convert read coordinates from coordinates of isoform that generated it to genomic coordinates genomic_left_read_start, genomic_left_read_end = \ gene.isoforms[chosen_iso].isoform_coords_to_genomic(left_read_start, left_read_end) genomic_right_read_start, genomic_right_read_end = \ gene.isoforms[chosen_iso].isoform_coords_to_genomic(right_read_start, right_read_end) # parameterized paired end reads as the start coordinate of the left pe_read = (genomic_left_read_start, genomic_left_read_end, genomic_right_read_start, genomic_right_read_end) alignment, frag_lens = gene.align_read_pair(pe_read[0], pe_read[1], pe_read[2], pe_read[3], overhang=overhang_len) return (alignment, frag_lens, pe_read)
def asym_double_claw(nsamp): inputs = [] for k in xrange(2): inputs.append((2*k-1, 2/3)) for k in xrange(1, 4): inputs.append((-k/2, 1/100)) for k in xrange(1, 4): inputs.append((k/2, 7/100)) counts = rand.multinomial(nsamp, [46/100]*2+[1/300]*3+[7/300]*3, size=1)[0] return _generate(inputs, counts)