def BLD_3_Generator(p,q,samplesize,lmbd,C,C2): #Going to do a fixed species tree with given probabilities of introgression. outputList=[] for x in range (0,samplesize): if numpy.random.random_sample()<p: outputList.append(expon.rvs()*lmbd) elif numpy.random.random_sample()<(p+q): outputList.append(expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd) else: outputList.append(expon.rvs()*lmbd+(C2-truncexpon.rvs(C2))*lmbd) #print (expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd) return [[('testA','testB','testC'),outputList,[],[]]]
def truncexpon_draws(self, lbd_scale, rndsd=0, thresh=1, datasize=1000): np.random.seed(rndsd) for i in range(self.numhyp): if self.alt_vec[i]==0: database=truncexpon.rvs(b=thresh, size=datasize) else: database=truncexpon.rvs(b=thresh, scale=lbd_scale, size=datasize) z=sum(database) pval=1 - norm.cdf(z,loc=datasize*(1+1/(1-np.exp(1))), scale=datasize*(1-np.exp(1)/(np.exp(1)-1)**2)) self.pvec[i]=pval dirname = './expsettings' filename = "P_NH%d_PM%.2f_lbd%.2f_SEED%d" % (self.numhyp, self.pi, lbd_scale,rndsd) saveres(dirname, filename, self.pvec)
def sample_times(node, num_times): if not hasattr(GC, "NUMPY_SEEDED"): from numpy.random import seed as numpy_seed numpy_seed(seed=GC.random_number_seed) GC.random_number_seed += 1 GC.NUMPY_SEEDED = True assert hasattr( GC, 'transmissions' ), "No transmission network found in global context! Run this after the transmission network simulation is done" first_time = node.get_first_infection_time() if first_time is None: return [] windows = [] last_time = first_time for u, v, t in GC.transmissions: if u == node and v == node: if last_time is not None and t > last_time: windows.append((last_time, t)) last_time = None elif last_time is None and v == node: last_time = t if last_time is not None and t > last_time: windows.append((last_time, GC.time)) if len(windows) == 0: windows.append((first_time, GC.time)) truncexpon_variates = (truncexpon.rvs(1, size=num_times)) out = [] for i in range(num_times): start, end = choice(windows) out.append((truncexpon_variates[i] * (end - start)) + start) return out
def _generate_num_repeat(self, scale=2.0): """ Helper function, randomly generate number of repetition for an image. Choose an integer with exponential distribution between [min, max]. Output: integer between [min_repeat, max_repeat) """ min_switch = truncexpon.rvs(b=(self.max_repeat - self.min_repeat) / scale, loc=self.min_repeat, scale=scale).astype(int) return min_switch
def exponential(self, N, T): """ :return: samples from the exponential distribution with support [self.left, self.right] Notes - We'll be using exponential with a support 0, 1. To achieve this we'll be truncating the exponential distribution after 1. The support of the exponential distribution is from 0 to infinity by default. We will be keeping the scale low so that not a lot of samples are truncated from the distribution and we have some consistency. """ # truncexpon moves from 0 to b. return truncexpon.rvs(b=self.right, loc=self.mean, scale=self.stdev, size=(N, T), random_state=self.random_seed)
def day_bucket_to_points(bucket_hist, buckets, kind, n_points=10000): """ Transforms one day to points, see `buckets_to_points` docs :param bucket_hist: One histogram i.e. one day of data from one metric :param buckets: List of buckets for the histogram :param kind: Kind of the histogram e.g. 'exponential' :param n_points: Number of points to be generated. Greater number of points indicates more accurate point distribution and more time that is needed. :return: Point representation of one day of data """ if kind == 'categorical': buckets = list(range(len(buckets))) if kind in ['exponential', 'count']: points = [] for i, dens in enumerate(bucket_hist): low = buckets[i] if i + 1 == len(buckets): high = low + (buckets[i] - buckets[i - 1]) else: high = buckets[i + 1] points += truncexpon.rvs( high, size=max(int(round(dens*n_points, 0)), 0), loc=low ).tolist() points = list(points) else: points = [] if len(bucket_hist) != len(buckets): buckets = list(range(len(bucket_hist))) for i, dens in enumerate(bucket_hist): low = buckets[i] if i + 1 == len(buckets): high = low + (buckets[i] - buckets[i - 1]) else: high = buckets[i + 1] points += np.random.uniform( high=high, low=low, size=max(int(round(dens*n_points, 0)), 0) ).tolist() points = list(points) return points
# Display the probability density function (``pdf``): x = np.linspace(truncexpon.ppf(0.01, b), truncexpon.ppf(0.99, b), 100) ax.plot(x, truncexpon.pdf(x, b), 'r-', lw=5, alpha=0.6, label='truncexpon pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = truncexpon(b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = truncexpon.ppf([0.001, 0.5, 0.999], b) np.allclose([0.001, 0.5, 0.999], truncexpon.cdf(vals, b)) # True # Generate random numbers: r = truncexpon.rvs(b, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def soc_trunc_filter(traffic, min_soc): socs = truncexpon.rvs(1, size=traffic, random_state=49816317) # TODO random seed might be counter productive should_charge = len(socs[socs < min_soc]) return should_charge
def absorption_profile(size): #Exponential random absorption profile return truncexpon.rvs(thickness / attenuation_length, scale=attenuation_length, size=size)
def absorption_profile(size): #Exponential random absorption profile return truncexpon.rvs(thickness/attenuation_length, scale=attenuation_length, size=size)
kQ = np.array([ aa_properties[trp]['lambda'] if trp in aa_properties else -1 for trp in tripepsQ ]) idxN = np.argsort(kN) idxQ = np.argsort(kQ) tripepsN = tripepsN[idxN] kN = kN[idxN] tripepsQ = tripepsQ[idxQ] kQ = kQ[idxQ] N = 20000 TmaxN = np.array([-np.log(1 - 0.99) / k for k in kN]) TmaxQ = np.array([-np.log(1 - 0.99) / k for k in kQ]) tN = [truncexpon.rvs(2.5, 0, tm / 2.5, int(N / len(tripepsN))) for tm in TmaxN] tN = np.concatenate(tN) tQ = [truncexpon.rvs(2.5, 0, tm / 2.5, int(N / len(tripepsQ))) for tm in TmaxQ] tQ = np.concatenate(tQ) sim_lim = 1e-7 deamid_cutoff = 0 # different levels of noise sigmas = [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.15, 0.1, 0.05, 0.005] bootstraps = 500 # For each level of noise, repeat expetiment bootstrap_size = 100 corrsN_1 = [] corrsQ_1 = [] # sigmas x bootstraps corrsN_2 = [] corrsQ_2 = [] corrsN_3 = []
def truncated_exponential_1(b): return truncexpon.rvs(b=b)
def g_rvs(num_rvs, limit=4.5): return truncexpon.rvs(loc=limit, b=np.inf, size=num_rvs)
def transmission_neutral_coalescent_tree(transmission_network, sample_times, rate=1): '''Sample a tree under the pure-neutral coalescent model constrained to a given transmission network with given patient sampling times. Args: ``transmission_network`` (``list``): The transmission network as a ``list`` of ``(u,v,t)`` tuples denoting the transmission from ``u`` to ``v`` at time ``t``. The transmission network must be sorted in ascending order of time ``sample_times`` (``dict``): The times at which each individual was sampled (i.e., the times of the leaves) as a ``dict`` in which keys are individuals from ``transmission_network`` and the value associated with an individual ``u`` is a ``list`` of times at which ``u`` was sampled (i.e., the times of the leaves from individual ``u``) ``rate`` (``float``): The rate of the Poisson process of coalescing two lineages Returns: A ``Tree`` object storing the sampled pure-neutral tree, where leaves are labeled ``ID|u|t``, where ``ID`` is a unique identifier for the leaf, ``u`` is the corresponding individual from ``transmission_network``, and ``t`` is the sample time (which equals the leaf's distance from the root). If there are multiple seed individuals (infected beforehand), a tree will be output for each. ''' if not isinstance(transmission_network, list): raise TypeError( "transmission_network must be a list, but it was a %s" % str(type(transmission_network))) if not isinstance(sample_times, dict): raise TypeError("sample_times must be a dict, but it was a %s" % str(type(sample_times))) time = dict() ID = 0 root = dict() leaves = dict() infected_by = dict() to_visit = set() infection_time = dict() for i in range(len(transmission_network) - 1, -1, -1): check_transmission_event(transmission_network[i]) if i != 0 and transmission_network[i][2] < transmission_network[i - 1][2]: raise ValueError( "transmission_network must be sorted in ascending order of time" ) u, v, t = transmission_network[i] if v in infection_time: raise ValueError( "Encountered duplicate transmission recipient: %s" % str(v)) else: infection_time[v] = t if u not in infected_by: infected_by[u] = list() if v not in infected_by: infected_by[v] = list() if v not in sample_times: raise KeyError("Individual not in sample_times: %s" % str(v)) if not isinstance(sample_times[v], list): if isinstance(sample_times[v], float) or isinstance( sample_times[v], int): sample_times[v] = [sample_times[v]] elif isinstance(sample_times[v], set): sample_times[v] = list(sample_times[v]) else: raise TypeError("Values in sample_times must be list") if u is not None and u != 'None': infected_by[u].append(v) to_visit.add(u) leaves[v] = list() for w in infected_by[v]: if w not in root: raise ValueError("Missing links in transmission_network") leaves[v].append(root[w]) del root[w] for st in sample_times[v]: if not isinstance(st, float) and not isinstance(st, int): raise TypeError("Values in sample_times must be list of float") newnode = Node(label='%d|%s|%f' % (ID, str(v), st)) ID += 1 time[newnode] = st leaves[v].append(newnode) leaves[v].sort(key=lambda x: time[x], reverse=True) lineages = Set() curr_time = time[leaves[v][0]] for w in leaves[v]: if time[w] < curr_time: while len(lineages) > 1: L = rate * len(lineages) * (len(lineages) - 1) / 2. coal_time = curr_time - exponential(1. / L) if coal_time >= time[w]: c1 = lineages.pop() c2 = lineages.pop() newnode = Node() time[newnode] = coal_time curr_time = coal_time newnode.add_child(c1) newnode.add_child(c2) lineages.add(newnode) curr_time = time[w] lineages.add(w) while len(lineages) != 1: L = rate * len(lineages) * (len(lineages) - 1) / 2. coal_time = curr_time - truncexpon.rvs(curr_time - t, scale=1. / L) c1 = lineages.pop() c2 = lineages.pop() newnode = Node() time[newnode] = coal_time curr_time = coal_time newnode.add_child(c1) newnode.add_child(c2) lineages.add(newnode) root[v] = lineages.pop() to_visit.discard(v) del leaves[v] if len(to_visit) != 0: raise ValueError( "Malformed transmission network. Missing the following seeds:\n%s" % '\n'.join(str(v) for v in to_visit)) out = list() for k, r in root.items(): if r.is_root(): tmp = Tree() tmp.root = r out.append(tmp) for node in tmp.traverse_preorder(): if node.is_root(): node.edge_length = time[node] - infection_time[k] else: node.edge_length = time[node] - time[node.parent] return out
aa_properties[trp]['lambda'] if trp in aa_properties else -1 for trp in tripepsQ ]) idxN = np.argsort(kN) idxQ = np.argsort(kQ) tripepsN = tripepsN[idxN] kN = kN[idxN] tripepsQ = tripepsQ[idxQ] kQ = kQ[idxQ] N = 20000 TmaxN = np.array([-np.log(1 - 0.99) / k for k in kN]) TmaxQ = np.array([-np.log(1 - 0.99) / k for k in kQ]) tN = [ truncexpon.rvs(2.35, 0, tm / 2.35, int(N / len(tripepsN))) for tm in TmaxN ] tN = np.concatenate(tN) tQ = [truncexpon.rvs(3, 0, tm / 3, int(N / len(tripepsQ))) for tm in TmaxQ] tQ = np.concatenate(tQ) sim_lim = 1e-7 deamid_cutoff = 0 s = 0.2 print('Simulating with sigma = {}'.format(s)) sigmasN = np.repeat(s, len(tripepsN)) sigmasQ = np.repeat(s, len(tripepsQ)) # K, sigmas, Tmean, Tstd, Tmax, N, tol tN_s = tN[np.random.randint(tN.shape[0], size=2000)] DN = rr.simulate_deamidation(kN, sigmasN, tN_s, sim_lim)