def __init__(self, Ninf, Ntotal, alpha=0.03, meancontacts=2): Ntotal = int(Ntotal) self.alpha = alpha self.Ninf = Ninf self.Ntotal = Ntotal self.Cstatic = self.genChome() self.homesize = np.array( self.Cstatic.sum(axis=0))[0].astype('int16') + 1 self.I = np.zeros(Ntotal, dtype='bool') perm = np.random.permutation(Ntotal) self.I[perm[0:Ninf]] = True self.meancontacts = meancontacts self.rho = poisson.rvs(meancontacts, size=Ntotal) self.kappa0 = np.maximum(np.random.normal(1, 0.25, Ntotal), 0) sigmamu = np.random.multivariate_normal([1, 1], [[0.25, 0], [0, 0.25]], size=Ntotal) self.sigma0 = np.maximum(sigmamu[:, 0], 0) self.mu0 = np.minimum(np.maximum(sigmamu[:, 1], 0), 4) self.tau0 = np.maximum(poisson.rvs(14, size=Ntotal), 8) self.chi = binom.rvs(5, 0.05, size=Ntotal) self.chi[self.I] = binom.rvs(5, self.mu0[self.I] / 4) self.S = ~self.I self.R = np.zeros(Ntotal, dtype='bool') self.D = np.zeros(Ntotal, dtype='bool') self.Idays = np.zeros(Ntotal) # number of days since infection self.Idays[perm[0:Ninf]] = 1 self.Vdays = np.zeros(Ntotal) # number of days since vaccination
def simulate(split, p1, p2, runs): n11, n10, n21, n20 = split n11_21 = binom.rvs(n11, p2, size=runs) n11_20 = (n11 - n11_21) n10_21 = binom.rvs(n10, p2, size=runs) n10_20 = (n10 - n10_21) n21_11 = binom.rvs(n21, p1, size=runs) n21_10 = (n21 - n21_11) n20_11 = binom.rvs(n20, p1, size=runs) n20_10 = (n20 - n20_11) # deal with the fact some of these n's can be 0. a11 = a(n11, n21_11, n20_11, p2) a10 = a(n10, n21_10, n20_10, p2) a21 = a(n21, n11_21, n10_21, p1) a20 = a(n20, n11_20, n10_20, p1) e11 = epsilon(a11) e10 = epsilon(a10) e21 = epsilon(a21) e20 = epsilon(a20) e = [np.mean(e11), np.mean(e10), np.mean(e21), np.mean(e20)] worst = max(e) return worst
def _getCorrectedVAF(self, Options): ''' Gets a corrected VAF based on the rawVAF using either a poisson distribution w/ binomial or a gamma fit dist if options are selected :param Options: Command arguments :return: None. Sets values in a dictionary. ''' # TODO need to add GAMMA distribution fits from a file if Options.depthFile != None: pass else: for t in self.VAFs: for mut in self.VAFs[t]: vaf = self.VAFs[t][mut]['rawVAF'] try: self.VAFs[t][mut]['reads'] = binom.rvs( Options.depth, vaf) except ValueError: # TODO vaf shouldn't ever be more than 1.0 but it is. self.VAFs[t][mut]['reads'] = binom.rvs( Options.depth, 1.0) self.VAFs[t][mut]['depth'] = float( poisson.rvs(Options.depth)) # print("%s reads, %s depth"%(fi,Di)) self.VAFs[t][mut]['correctedVAF'] = self.VAFs[t][mut][ 'reads'] / self.VAFs[t][mut]['depth']
def testAffinity(a, h, N, runs, f=None, **kwargs): nok = 0 for _ in range(runs): p = a * h / (a * h + (1 - a) * (1 - h)) if not f: nt = binom.rvs(N, p) no = N - nt pa = pAffinityPct(nt, no, h, False, **kwargs) else: H0 = binom.rvs(N, h) H1 = N - H0 z = (1 - a) / a f0 = f / (h + z * (1 - h)) f1 = z * f0 t0 = binom.rvs(H0, f0) t1 = binom.rvs(H1, f1) pa = pAffinity([t0, H0 - t0, t1, H1 - t1], False, **kwargs) hdi = hdis([pa], **kwargs)[0] ok = hdi[0] <= a and a <= hdi[2] if runs <= 100: if not f: print('%2d %2d %.2f %.2f %.2f %s' % (nt, no, hdi[0], hdi[1], hdi[2], '*' if ok else '')) else: print('%2d %2d %2d %2d %.2f %.2f %.2f %s' % (t0, H0 - t0, t1, H1 - t1, hdi[0], hdi[1], hdi[2], '*' if ok else '')) nok += ok print(nok, runs)
def admixfrog_sample(ids, ref, snps, coverage, contamination, libs, name): S = [] for cov, cont, lib in zip(coverage, contamination, libs): print(f'Sample{name}\tLib:{lib}\tCov:{cov}\tcont:{cont}', end="\t") data = ref[['chrom', 'pos']].copy() data['true_alt'] = np.sum(snps[ids], 1) data['true_ref'] = 2 - data['true_alt'] data['lib'] = lib cov_real = poisson.rvs(cov * (1. - cont), size=data.shape[0]) cov_cont = poisson.rvs(cov * cont, size=data.shape[0]) p = data['true_alt'] / (data['true_ref'] + data['true_alt']) p_cont = ref.CONT_alt / (ref.CONT_ref + ref.CONT_alt) data['ralt'] = binom.rvs(cov_real, p) data['rref'] = cov_real - data['ralt'] data['calt'] = binom.rvs(cov_cont, p_cont) data['cref'] = cov_cont - data['calt'] data['talt'] = data.ralt + data.calt data['tref'] = data.rref + data.cref print( f"alt:\t{lib}\t{np.mean(data['ralt']):.3f}\t{np.mean(data['calt']):.3f}\t{np.mean(data['talt']):.3f}" ) print( f"ref:\t{lib}\t{np.mean(data['rref']):.3f}\t{np.mean(data['cref']):.3f}\t{np.mean(data['tref']):.3f}" ) data = data[data.tref + data.talt > 0] S.append(data) data = pd.concat(S).sort_values(['chrom', 'pos', 'lib']) data.to_csv(f"{name}.sample.txt", float_format="%.5f", index=False)
def gm(xinit=1.0, gamma=0.05, alpha=1, beta=0.05, G=200, n=40, duration=1, nsteps=100): ''' pref=" GM: " print pref, "xinit=", xinit print pref, "alpha=", alpha print pref, "beta=", beta print pref, "tau=", tau print pref, "n=", n print pref, "duration=", duration print pref, "nsteps per unit time=", nsteps ''' delta = 1.0 / nsteps #print pref, "delta=", delta # myt= P.arange(0.0, float(duration), delta) # myx= P.arange(0.0, float(duration), delta) totSteps = duration * nsteps # myx = P.array(totSteps) pseq = [0 for i in range(totSteps)] aseq = [0 for i in range(totSteps)] nseq = [0 for i in range(totSteps)] Pt = [0 for i in range(totSteps)] At = [0 for i in range(totSteps)] Nt = [0 for i in range(totSteps)] qt = [0 for i in range(totSteps)] n = int(n) pseq[0] = n Pt[0] = n At[0] = n myRes = list() for i in range(1, totSteps): qt[i] = gamma * (i)**(-alpha) for t in range(1, totSteps): S = G - Nt[t - 1] - At[t - 1] pseq[t] = binom.rvs(S, beta * At[t - 1] / G) Pt[t] = Pt[t - 1] + pseq[t] aseq[t] = pseq[t] for j in range(0, t - 1): tmp = binom.rvs(aseq[j], qt[t - j]) aseq[j] = aseq[j] - tmp nseq[t] = nseq[t] + tmp Nt[t] = Nt[t - 1] + nseq[t] At[t] = At[t - 1] + pseq[t] - nseq[t] print Pt myRes.append(Pt) myRes.append(At) myRes.append(Nt) return (myRes)
def simulate_pairs(distributions, d1, d2, add_runs): """ """ r = 0 n = settings.N_BINOM while r < add_runs: z = binom.rvs(n, thetas[int(d1)]) distributions[d1].get_posterior(n , z) z = binom.rvs(n, thetas[int(d2)]) distributions[d2].get_posterior(n , z)
def generar_Ni(pi, k, n): Nis = [] p = 1 N = binom.rvs(n, pi[0]) Nis.append(N) for i in range(1, k): p -= pi[i - 1] Ni = binom.rvs(n - N, pi[i] / p) N += Ni Nis.append(Ni) return Nis
def run_sig(): signal_responses = binom.rvs(100, 0.69, size=1) noise_responses = binom.rvs(100, 0.30, size=1) m = sig_detect(signal_responses, noise_responses, 1, 100) with m: #step = pm.Metropolis(blocked=False) step = pm.HamiltonianMC() start = pm.find_MAP() #start = {'Pr. mean discrim.':0.0, 'Pr. mean bias':0.0, # 'taud':0.001, 'tauc':0.001} trace = pm.sample(5000, step, start, tune=500, njobs=2) return trace[1000:]
def supermarket_log(starting_time, finish_time, warehouse, file): # one day operation """ Simulating one day of restock and sells in a supermarket, the events in the supermarket follow an exponential distribution with an average time between events of 5 minutes. Each time that an event occur, the next event (restock or sell) is chosen with a binomial distribution where a sell has probability 0.65 and a restock 0.35. When a client buy a product, that product is selected randomly uniformly, whereas the quantity is chosen from a binomial with n=(max quantity of the product chosen) and p=0.15 We are making one restock at once, each time that a restock is made the product selected is randomly uniformly chosen, meanwhile the quantity of the product to restock is chosen from a binomial where n=(max quantity allowed in shelves), p=0.65 Parameters ---------- starting_time: supermarket opening time finish_time: supermarket closing time warehouse: class Warehouse where our products catalog is saved, we need this information to know products and their codes in our supermarket file: file path in which save our daily log """ log = [] last_hour = starting_time while last_hour < finish_time: # our loop finish when the last transaction has passed finish_time if binom.rvs(1, 0.65): product_chosen = list( warehouse.products.keys())[randint.rvs(1, 19) - 1] last_hour += timedelta(minutes=float(expon.rvs(scale=5, size=1))) aux = [ 'venta', last_hour, product_chosen, binom.rvs(n=warehouse[product_chosen][0], p=0.15, loc=1) ] log.append(aux) else: last_hour += timedelta(minutes=float(expon.rvs(scale=5, size=1))) product_chosen = list(warehouse.products.keys())[randint.rvs( 0, len(amazon.products) - 1)] log.append([ 'repo', last_hour, product_chosen, binom.rvs(n=warehouse[product_chosen][0], p=0.65, loc=1) ]) with open(file, 'w') as f: text = "" for el in log: text += el[0] + ' ' + format_date(el[1]) + " " + el[2] + " " + str( el[3]) + "\n" f.write(text)
def gm(xinit=1.0, gamma=0.05, alpha=1, beta=0.05, G=200, n=40, duration=1, nsteps=100): """ pref=" GM: " print pref, "xinit=", xinit print pref, "alpha=", alpha print pref, "beta=", beta print pref, "tau=", tau print pref, "n=", n print pref, "duration=", duration print pref, "nsteps per unit time=", nsteps """ delta = 1.0 / nsteps # print pref, "delta=", delta # myt= P.arange(0.0, float(duration), delta) # myx= P.arange(0.0, float(duration), delta) totSteps = duration * nsteps # myx = P.array(totSteps) pseq = [0 for i in range(totSteps)] aseq = [0 for i in range(totSteps)] nseq = [0 for i in range(totSteps)] Pt = [0 for i in range(totSteps)] At = [0 for i in range(totSteps)] Nt = [0 for i in range(totSteps)] qt = [0 for i in range(totSteps)] n = int(n) pseq[0] = n Pt[0] = n At[0] = n myRes = list() for i in range(1, totSteps): qt[i] = gamma * (i) ** (-alpha) for t in range(1, totSteps): S = G - Nt[t - 1] - At[t - 1] pseq[t] = binom.rvs(S, beta * At[t - 1] / G) Pt[t] = Pt[t - 1] + pseq[t] aseq[t] = pseq[t] for j in range(0, t - 1): tmp = binom.rvs(aseq[j], qt[t - j]) aseq[j] = aseq[j] - tmp nseq[t] = nseq[t] + tmp Nt[t] = Nt[t - 1] + nseq[t] At[t] = At[t - 1] + pseq[t] - nseq[t] print Pt myRes.append(Pt) myRes.append(At) myRes.append(Nt) return myRes
def draw_binomial_distribution(): binom.rvs(size=10, n=20, p=0.8) data_binom = binom.rvs(n=20, p=0.8, loc=0, size=1000) ax = sns.distplot(data_binom, kde=True, color='blue', hist_kws={ "linewidth": 25, 'alpha': 1 }) ax.set(xlabel='Binomial', ylabel='Frequency') plt.show()
def degree_of_certainty_draws(base_rate, treatment_rate, num_participants=None, num_control=None, num_treatment=None, num_draws=1000): """ Draw num_draws from the distribution of the degree of certainty given an assumed base rate and treatment rate. You must provide either num_partipicants OR num_control and num_treatment. Args: base_rate (float): The assumed rate at which the control group registers a success treatment_rate (float): The assumed rate at which the treatment group registers a success num_participants (int|None): The number of participants in the experiment. Assumes both control and treatment have equal numbers of participants. If None, must provide BOTH num_control and num_treatment num_control (int|None): The number of participants in the control group. If provided, must also provide num_treatment num_treatment (int|None): The number of participants in the treatment group. If provided, must also provide num_control num_draws (int): The number of draws from the degree of certainty distribution Returns: np.ndarray[float]: The drawn degrees of certainty """ if num_participants: num_control = num_participants // 2 num_treatment = num_participants - num_control else: if not (num_base and num_treatment): raise ValueError( "If you provide num_control or num_treatment you must provide the other" ) # num_control = num_participants // 2 # num_treatment = num_participants - num_control successes_control = binom.rvs(num_control, base_rate, size=num_draws) failures_control = num_participants - successes_control successes_treatment = binom.rvs(num_treatment, treatment_rate, size=num_draws) failures_treatment = num_participants - successes_treatment return np.array([ degree_of_certainty(s_control, f_control, s_treatment, f_treatment) for s_control, f_control, s_treatment, f_treatment in zip( successes_control, failures_control, successes_b, failures_b) ])
def sim_n_i(self): n = self.n pi = self.pi k = self.k Nis = [] p = 1 N = binom.rvs(n, pi[0]) Nis.append(N) for i in range(1, k): p -= pi[i - 1] Ni = binom.rvs(n - N, pi[i] / p) N += Ni Nis.append(Ni) return Nis
def rbinom(n=1, size=1, prob=0.5): """ ============================================================================ rbinom() ============================================================================ Creates an array of random numbers from a binomial distribution of "size" number of trials, and probability of success "prob" for each trial. Can be thought of as returning "n" random number of 'successes' out of a set of trials of size "size". USAGE: dbinom(x, size, prob=0.5, log=False) pbinom(q, size, prob=0.5, lowertail=True, log=False) qbinom(p, size, prob=0.5, lowertail=True, log=False) rbinom(n=1, size=1, prob=0.5) :param n: int. size of the array :param size: int. Number of trials :param prob: float. probability of success for each trial :return: returns an array of random numbers EXAMPLES: rbinom() # returns eg a flip of a fair coin rbinom(10) # returns eg 10 flips of a fair coin rbinom(10, prob=0.7) # returns eg 10 flips of an unfair coin P(Head)= 0.7 ============================================================================ """ # Note, scipy flips meaning of n and size return binom.rvs(n=size, p=prob, size=n)
def generar_procesos_aleatorios(self, num_procesos, tiempo_total): ''' Permite crear procesos aleatorios ---- params int num_procesos -> Numero de procesos que se desea int tiempo_total -> Suma de todas las duraciones de los procesos ''' n = int(4.05 * tiempo_total) p = 1 / 9 tiempos_iniciales = np.sort(binom.rvs( n, p, size=num_procesos)) ##cuando inicia un proceso media = tiempo_total // num_procesos var = media // 2 tiempo = tiempo_total - tiempo_total // num_procesos * num_procesos varianzas = [] for i in range(num_procesos - 1): ran = randint(-var, var) tiempo -= ran varianzas.append(ran) varianzas.append(tiempo) tiempos = np.repeat(media, num_procesos) + \ np.array(varianzas[::-1]) ##Cuanto dura el proceso if tiempos[0] <= 0: n = abs(tiempos[0]) + 1 n = n + media % n tiempos = tiempos + n self.procesos_original = [ Proceso(i, t, l) for i, t, l in zip(tiempos_iniciales, tiempos, ascii_uppercase) ] self.tiempo_total_original = tiempo_total
def mysppron(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 print("Pronunciation_posteriori_probability_score_percentage= :%.2f" % (b)) return
def __getitem__(self, i: int) -> Tuple[ImageInstance, ImageInstance]: """sample from DET with probability p_det, or VID with probability 1 - p_det. If sampling from DET use the same image, pretending that they are adjacent frames in a sequence. Args: i: not used. Returns: instance: pair of adjacent frames from a sequence along with labels. """ sample_det = binom.rvs(1, self.p_det) if sample_det: instance = self._det_sampler.sample() # add arbitrary track_ids to DET instance instance = ImageInstance( im=instance.im, labels=tuple( ObjectLabel(class_id=label.class_id, class_name=label.class_name, box=label.box, track_id=t_id) for t_id, label in enumerate(instance.labels))) instance = (instance, instance) else: instance = self._vid_sampler.sample() return instance
def generate_hull(n, m=None): # Sample random points in grid, then take the convex hull if m is None: m = binom.rvs((n+1)**2, 1/(n+1)) # compute number of points to be included # sample m points uniformly across grid points = [divmod(p, n+1) for p in sorted(sample(range((n+1)**2), m))] if m <= 3: return (m, np.array(points)) # Compute convex hull of the points chosen, # uses the monotone chain algorithm lower = points.copy() # preallocate array i = 0 for p in points: while i >= 2 and cross(lower[i-2], lower[i-1], p) <= 0: i -= 1 lower[i] = p i += 1 upper = points.copy() j = 0 for p in reversed(points): while j >= 2 and cross(upper[j-2], upper[j-1], p) <= 0: j -= 1 upper[j] = p j += 1 # Concatenation of the lower and upper hulls gives the convex hull. # Last point of each list is omitted because it is repeated at the beginning of the other list. return (len(points), np.array(lower[:i-1] + upper[:j-1]))
def _get_evidence(self, state): """ Computes noisy distances between pacman and ghosts. Arguments: ---------- - `state`: The current game state s_t where 't' is the current time step. See FAQ and class `pacman.GameState`. Return: ------- - A list of Z noised distances in real numbers where Z is the number of ghosts. XXX: DO NOT MODIFY THIS FUNCTION !!! Doing so will result in a 0 grade. """ positions = state.getGhostPositions() pacman_position = state.getPacmanPosition() noisy_distances = [] for pos in positions: true_distance = util.manhattanDistance(pos, pacman_position) noise = binom.rvs(self.n, self.p) - self.n*self.p noisy_distances.append(true_distance + noise) return noisy_distances
def sim_nd_na(E,N=1000, size_mean=100): """Simulate an exponential-size burst distribution with binomial (nd,na) """ nt = np.ceil(expon.rvs(scale=size_mean, size=N)).astype(int) na = binom.rvs(nt, E) nd = nt - na return nd, na
def estimate(self): # get the event prob and payout vectors self.ps = np.array(self.data['prob']) self.Ps = np.array(self.data['payout']) # payout liability mean and stdev self.mu = np.sum(self.ps * self.Ps) self.sd = np.sqrt(np.sum(self.Ps**2 * (1 - self.ps) * self.ps)) # total liability self.L = np.sum(self.Ps) # do a Monte Carlo simulation to find C, collateral m = np.matrix([binom.rvs(1, p, size=self.N) for p in self.ps]) samples = np.array(self.Ps * m) self.C = mquantiles(samples, prob=[self.pi], alphap=1, betap=1) # Type 7 self.data['premium'] = (self.ps * self.Ps) / (np.sum(self.ps * self.Ps)) * self.C # return multiple self.r = self.L / self.C self.c = self.C / self.L # revenue self.R = self.C - self.mu
def update_figure(sample_data): # first sample from current posterior beta distribution sample_data = json.loads(sample_data) p = sample_data['sample'] sample_x = binom.rvs(n,p,1) posterior_histogram.append(sample_x) trace = go.Histogram( x=posterior_histogram, xbins=dict( start=0, end=10, size=1 ) ) layout_posterior = go.Layout( xaxis={'title': 'X', 'range': [0,10]}, yaxis={'title': 'Prob X~'}, margin={'l': 40, 'b': 40, 't': 10, 'r': 10}, legend={'x': 0, 'y': 1} ) fig3 = dict(data=[trace],layout=layout_posterior) return fig3
def sim_nd_na(E, N=1000, size_mean=100): """Simulate an exponential-size burst distribution with binomial (nd,na) """ nt = np.ceil(expon.rvs(scale=size_mean, size=N)).astype(int) na = binom.rvs(nt, E) nd = nt - na return nd, na
def _generate_sample_from_state(self, state, random_state=None): res = [] for dim in range(self.n_features): erg = round(sum([binom.rvs(self.n[dim], self.p[dim][state][comp]) * self.c[dim][state][comp] for comp in range(self.distr_magnitude)])) res.append(erg) return np.array(res)
def binomial_path(spot: float, expiry: float, rate: float, div: float, vol: float, num: int) -> np.ndarray: # calculate h h = expiry / num # calculate u and d u = np.exp((rate - div) * h + vol * np.sqrt(h)) d = np.exp((rate - div) * h - vol * np.sqrt(h)) # calculate p* p_star = (np.exp((rate - div) * h) - d) / (u - d) # print(p_star) path = binom.rvs(1, p_star, size=num) prices = np.zeros(num + 1) prices[0] = spot j = 1 for i in path: if i == 1: prices[j] = prices[j - 1] * u else: prices[j] = prices[j - 1] * d j += 1 return prices
def __init__(self, N, comm=MPI.COMM_SELF): self.comm = comm self.rank = self.comm.Get_rank() self.I = (-1.5, a + 1) # CHECK: Is appropriate bound? OK. self.lamtol = 0 self.mtol = mtol self.N = N if self.rank == 0: N1 = binom.rvs(N, 2.0 / 3) #print "N1 = {}".format(N1) N2 = N - N1 data = np.hstack( [np.random.randn(N1), np.random.randn(N2) + a]) else: data = None data = self.comm.bcast(data) self.data = data self.var = np.var(data) self.h_crit = fisher_marron_critical_bandwidth( data, self.lamtol, self.mtol, self.I) #print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit)) self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit( data.reshape(-1, 1))
def rbinom(n,size,prob=0.5): """ Generates random variables from the binomial distribution """ from scipy.stats import binom result=binom.rvs(n=size,p=prob,size=n) return result
def lnlike(p): smalllnl = -1000000. whichmet = int(nmodels * p[0]) #nreal=10 nreal = 1 like = 0. for k in range(nreal): lnlike = 0. if whichmet == 0: for i in range(1, ndim): r = int((Nagents + 1) * np.random.uniform(0, 1, 1)) lnlike += -0.5 * (Nobs[i - 1] - r) * (Nobs[i - 1] - r) / ( epsilon[whichmet][i - 1] * epsilon[whichmet][i - 1]) - np.log(norms[whichmet][i - 1]) #if np.abs(Nobs[i-1]-r) > epsilon[whichmet][i-1]: # lnlike+=smalllnl else: for i in range(1, ndim): r = binom.rvs(Nagents, p[i]) lnlike += -0.5 * (Nobs[i - 1] - r) * (Nobs[i - 1] - r) / ( epsilon[whichmet][i - 1] * epsilon[whichmet][i - 1]) - np.log(norms[whichmet][i - 1]) #if np.abs(Nobs[i-1]-r) > epsilon[whichmet][i-1]: # lnlike+=smalllnl like += np.exp(lnlike) #print like,nreal if like == 0.0: lnlike = smalllnl else: lnlike = np.log(like / (1.0 * nreal)) return lnlike
def multiway_boot(df, reps, levels, show_progress=False): """ Generate a sequence of bootstrap samples from a dataframe. Parameters ---------- df : a Pandas DataFrame object reps : number of reps levels : 1-K levels in a list. The dataframe must have a hierarchical index for these levels. See example below. """ if show_progress: iterator = ProgressBar(maxval=reps)(range(reps)) else: iterator = range(reps) indexes = [df.groupby(level=l).apply(lambda x:1) for l in levels] for i in iterator: weight = np.prod([ pd.Series( 2*binom.rvs(1, 0.5, size=ix.shape[0]), index=ix.index, name='%s_weight' % level).reindex(df.index, level=level) for ix, level in zip(indexes, levels) ], axis=0) replicate = df[weight > 0] replicate['rep'] = i replicate['weight'] = weight[weight > 0] yield replicate
def artificial_data1(): N = 1000 N_groups = int(N / 10) beta0 = -1.6 beta1 = -0.03 beta2 = 0.6 beta3 = 1.6 df = pd.DataFrame(np.random.uniform(-1, 1, size=(N, 3)), columns=['X1', 'X2', 'X3']) df['pi_x'] = np.exp(beta0 + beta1 * df['X1'] + beta2 * df['X2'] + beta3 * df['X3']) / ( 1 + np.exp(beta0 + beta1 * df['X1'] + beta2 * df['X2'] + beta3 * df['X3'])) df['y'] = binom.rvs(1, df['pi_x']) df['constant'] = 1 df['group'] = np.random.choice(range(N_groups), size=(N, 1)) df['covarX1'] = 0 df['covarX2'] = 0 df['covarX3'] = 0 for g in range(N_groups): df.loc[df['group'] == g, 'covarX1'] = df['X1'].loc[df['group'] == g].mean() df.loc[df['group'] == g, 'covarX2'] = df['X2'].loc[df['group'] == g].mean() df.loc[df['group'] == g, 'covarX3'] = df['X3'].loc[df['group'] == g].mean() return df
def sample_distribution_func(samples, distribution, mean=0, sd=1, mu=3, n=10, p=0.5): """This function will draw random numbers from a given distribution (Normal, Poisson, Binomial). It takes one argument for the number of samples and a second argument which specifies the distribution (Normal, Poisson or Binomial). The function can also handle additional parameters depending on the distribution chosen""" f_sampled_distribution = np.zeros(shape=[0, 1]) if distribution == "Normal": f_sampled_distribution = norm.rvs(size=samples, loc=mean, scale=sd) print (samples, "samples of a", distribution, "distribution with parameter values of mean =", mean, "and sd =", sd) return f_sampled_distribution elif distribution == "Poisson": f_sampled_distribution = poisson.rvs(size=samples, mu=mu) print (samples, "samples of a", distribution, "distribution with a parameter value of mu =", mu) return f_sampled_distribution elif distribution == "Binomial": f_sampled_distribution = binom.rvs(size=samples, n=n, p=p) print (samples, "samples of a", distribution, "distribution with parameter values of n =", n, "and p =", p) return f_sampled_distribution else: print('ERROR!:', distribution, "distribution is not defined within function") return f_sampled_distribution
def assignGenetics(world): nDeaf = int(math.ceil(world.nAgents *world.parameters["gDom"])) n_oneCopy = int((world.nAgents-nDeaf) * world.parameters["gCarry"]) n_twoCopies = nDeaf n_zeroCopies = len(world.pop) - n_oneCopy - n_twoCopies # 0 = hearing, 1 = deaf nZeroOne = binom.rvs(n=n_oneCopy,p=.5) # server mod #nZeroOne = binom.rvs(n_oneCopy,.5) nOneZero = n_oneCopy - nZeroOne gene_distribution = ([(1,1)] * n_twoCopies) +([(0,1)]*nZeroOne) + ([(1,0)]*nOneZero) + ([(0,0)] * n_zeroCopies) if world.parameters["initialDeafLocations"]=="Random": random.shuffle(gene_distribution) # otherwise: if not shuffled, then genes will be clustered by compound for i in range(len(gene_distribution)): agent = world.pop[i] agent.genes = gene_distribution[i] if gene_distribution[i] == (1,1) or random.random() < world.parameters["NonGeneticDeafness"]: agent.deafStatus = True else: agent.deafStatus = False
def myspp(m, p, q): sound = m sourcerun = p path = q objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 return b
def sampled_lineshape(lineshape_func, delta, tau_pi=6e-3, linecenter=0., state_prep=False, samples_per_point=100): """Get approximate lineshape via sampling. Parameters ---------- lineshape_func : function Theoretical lineshape function. Must have form f(delta, tau_pi, state_prep, omega_0).¨ delta : scalar or array Frequencies at which to sample. tau_pi : float, 6 us by default linecenter : float Frequency of clock transition. state_prep : boolean, False by default State preparation flag samples_per_point : float How many samples to take from binomial distribution to model probing cycles. Returns ------- The sampled lineshape.""" # quantum jump p from theory jump_probabilities = lineshape_func(delta, tau_pi, state_prep, linecenter) # draws from binomial measured_results = binom.rvs(n=samples_per_point, p=jump_probabilities) sample_shape = measured_results / samples_per_point return sample_shape
def trial(self): """ Run a trial, incrementint success counter and updating html output """ self.outcome = binom.rvs(self.ndraws, self._P) return self.outcome
def _generate_sample_from_state(self, state, random_state=None): output = [] for i, d in enumerate(self.dim): for _ in range(d): output.append(binom.rvs(self.n[i], self.p[i][state])) return np.asarray(output)
def assignGenetics(world): nDeaf = int(math.ceil(world.nAgents *world.parameters["gDom"])) n_oneCopy = int((world.nAgents-nDeaf) * world.parameters["gCarry"]) n_twoCopies = nDeaf n_zeroCopies = len(world.pop) - n_oneCopy - n_twoCopies # 0 = hearing, 1 = deaf nZeroOne = binom.rvs(n=n_oneCopy,p=.5) # server mod #nZeroOne = binom.rvs(n_oneCopy,.5) nOneZero = n_oneCopy - nZeroOne gene_distribution = ([(0,1)]*nZeroOne) + ([(1,0)]*nOneZero) + ([(1,1)] * n_twoCopies) + ([(0,0)] * n_zeroCopies) random.shuffle(gene_distribution) for i in range(len(gene_distribution)): agent = world.pop[i] agent.genes = gene_distribution[i] if gene_distribution[i] == (1,1): agent.deafStatus = True else: agent.deafStatus = False
def simulate_data(beta_0, num_players, num_matches, time_range, rho, sig, covariates): # --------------------------------------- # pre loop init betas = [None] * len(time_range) y = [np.matrix([[None] * num_players] * num_players) for t in time_range] beta = beta_0 # Generate results through time for t in time_range: # init point in time results matrix y_t = np.matrix([[0 for j in range(num_players)] for i in range(num_players)]) n_t = num_matches[t] # loop through player combinations and simulate wins for i in range(num_players): for j in range(num_players): if j > i: y_t[i, j] = binom.rvs(n_t[i, j], win_prob(i, j, beta, covariates), size=1) y[t] = y_t # store used abilities betas[t] = beta # propogate abilities through time for next iteration beta = rho * beta + sig * np.sqrt(1 - rho**2) * norm.rvs( size=len(beta), loc=0, scale=1) return y, betas
def myspp(bp, bg): sound = bg + "/" + "dataset" + "/" + "audioFiles" + "/" + bp + ".wav" sourcerun = bg + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = bg + "/" + "dataset" + "/" + "audioFiles" + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 return b
def _generate_sample_from_state(self, state, random_state=None): output = [] for i, d in enumerate(self.dim): for _ in range(d): output.append( binom.rvs(self.n[i], self.p[i][state]) ) return np.asarray(output)
def make_binom_data(): x = np.linspace(-50, 50, NTOT) X = np.atleast_2d(x).T p = 0.5 * (np.sin(x / 5.) + 1) n = 1000 y = binom.rvs(p=p, n=n) return X, y, p, n
def central_limit_theorem(): y = [] n = 100 for i in range(1000): r = binom.rvs(n, 0.3) rsum = np.sum(r) z = (rsum - n * 0.3) / np.sqrt(n * 0.3 * 0.7) y.append(z) plt.hist(y, color='grey') plt.savefig('central_limit_theorem.png')
def shouldersamp(N, comm): if comm.Get_rank() == 0: N1 = binom.rvs(N, 1.0/17) N2 = N - N1 m1 = -1.25 s1 = 0.25 data = np.hstack([s1*np.random.randn(N1)+m1, np.random.randn(N2)]) else: data = None data = comm.bcast(data) return data
def central_limit_theorem(): y = [] n=1000 p = 0.6 for i in range(n): r = binom.rvs(n, p) rsum=np.sum(r) z=(rsum-n*p)/np.sqrt(n*p*(1-p)) y.append(z) print y plt.hist(y,color='grey') plt.show()
def adjust_args(args): idx = args.index('-S') s = float(args[idx + 1]) p = s / (2.0 - s) del args[idx:idx+2] # Adjust demographic parameters (Nordborg & Donnelly (1997) and # Nordborg (2000)) # mutation rate: \theta_eff = \theta * (2 - s) / s if '-t' in args: idx = args.index('-t') args[idx+1] = str(float(args[idx+1]) * (2.0 - s) / 2.0) # recombination: \rho_eff = \rho * (1 - s) if '-r' in args: idx = args.index('-r') args[idx+1] = str(float(args[idx+1]) * (1.0 - s)) if '-I' in args: idx = args.index('-I') npops = int(args[idx + 1]) start = idx + 2 stop = start + npops for n in range(start, stop): # binom(int( args[n] = str(int(args[n]) - binom.rvs(int(args[n+npops]), p)) args[0] = str(sum([int(i) for i in args[start:stop]])) start += npops stop += npops del args[start:stop] else: args[0] = str(int(args[0]) - binom.rvs(int(args[2]), p)) del args[2] return args
def first_estimations(distributions, thetas): """ """ epoch = 1 n = settings.N_BINOM for d in distributions: i = 0 while i < settings.NB_RUNS: z = binom.rvs(n, thetas[int(d)]) #print("z("+str(d)+"):" + str(z)) distributions[str(d)].get_posterior(n , z) i += 1
def law_of_large_numbers(): x = np.arange(1, 1001, 1) r1 = binom.rvs(10, 0.6, size=1000) r2 = poisson.rvs(mu=6, size=1000) r3 = norm.rvs(loc=6, size=1000) y = [] rsum=0.0 for i in range(1000): rsum=rsum+(r1[i]+r2[i]+r3[i]) y.append(rsum/((i+1)*3)-6) plt.plot(x, y, color='red') plt.show()
def solve(n, p): res = np.zeros((4, M)).tolist() emp = np.zeros((4, M)).tolist() for k, x in enumerate(binom.rvs(n, p, size=M).tolist()): cp = clopper_pearson(x, n) at = arcsine_transformation(x, n) wl = wilson(x, n) nr = normal_approximation(x, n) res[0][k], emp[0][k] = cp[0], cp[1][0] < p < cp[1][1] res[1][k], emp[1][k] = at[0], at[1][0] < p < at[1][1] res[2][k], emp[2][k] = wl[0], wl[1][0] < p < wl[1][1] res[3][k], emp[3][k] = nr[0], nr[1][0] < p < nr[1][1] print np.mean(res, axis=1), np.var(res, axis=1), np.mean(emp, axis=1)
def __init__(self, N, comm=MPI.COMM_SELF): self.comm = comm self.rank = self.comm.Get_rank() self.I = (-1.5, 1.5) # CHECK: Is appropriate bound? OK. self.N = N if self.rank == 0: N1 = binom.rvs(N, 1.0/17) #print "N1 = {}".format(N1) N2 = N - N1 m1 = -1.25 s1 = 0.25 data = np.hstack([s1*np.random.randn(N1)+m1, np.random.randn(N2)]) else: data = None data = self.comm.bcast(data) self.data = data self.var = np.var(data) self.h_crit = critical_bandwidth(data, self.I) #print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit)) self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(data.reshape(-1, 1))
def __init__(self, N, comm=MPI.COMM_SELF): self.comm = comm self.rank = self.comm.Get_rank() self.I = (-1.5, a+1) # CHECK: Is appropriate bound? OK. self.lamtol = 0 self.mtol = mtol self.N = N if self.rank == 0: N1 = binom.rvs(N, 2.0/3) #print "N1 = {}".format(N1) N2 = N - N1 data = np.hstack([np.random.randn(N1), np.random.randn(N2)+a]) else: data = None data = self.comm.bcast(data) self.data = data self.var = np.var(data) self.h_crit = fisher_marron_critical_bandwidth(data, self.lamtol, self.mtol, self.I) #print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit)) self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(data.reshape(-1, 1))
def births_and_deaths(world): """ Population change by replacement """ nHits = binom.rvs(n=world.parameters["maxNumberOfBirthDeathEachStage"],p=world.parameters["probOfBirthDeathEachStage"]) # server mod #nHits = binom.rvs(world.parameters["maxNumberOfBirthDeathEachStage"],world.parameters["probOfBirthDeathEachStage"]) for i in range(nHits): # find a couple to have a child couples =findMarriedCouples(world) if len(couples)>0: couples =findMarriedCouples(world) new_parents = random.choice(couples) #print new_parents #print len(world.pop) # have a child reproduce(world.pop[new_parents[0]], world.pop[new_parents[1]], world) # remove someone agent_to_remove = random.choice(world.pop) world.removeAgent(agent_to_remove)
def simulatePoll(N,p): return binom.rvs(N,p)/N
from scipy.stats import binom import numpy as np import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculamos los primeros momentos: n, p = 5, 0.4 mean, var, skew, kurt = binom.stats(n, p, moments='mvsk') # Mostramos el pmf de la variable aleatoria (``pmf``): x = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p)) ax.plot(x, binom.pmf(x, n, p), 'bo', ms=8, label='pmf binomial') ax.vlines(x, 0, binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5) ax.legend(loc='best', frameon=False) # Comprobar la exactitud del ``cdf`` y ``ppf``: prob = binom.cdf(x, n, p) np.allclose(x, binom.ppf(prob, n, p)) # Generamos numeros aleatorios r = binom.rvs(n, p, size=1000) plt.show()
def _generate_sample_from_state(self, state, random_state=None): return np.array( [binom.rvs(self.n[0], self.p[0][state]), binom.rvs(self.n[1], self.p[1][state])] )
# # Code 6.17 - Negative binomial model in Python using Stan # 1 response (nby) and 2 explanatory variables (x1, x2) import numpy as np import pystan from scipy.stats import uniform, binom, nbinom import statsmodels.api as sm # Data np.random.seed(141) # set seed to replicate example nobs= 2500 # number of obs in model x1 = binom.rvs(1, 0.6, size=nobs) # categorical explanatory variable x2 = uniform.rvs(size=nobs) # real explanatory variable theta = 0.303 X = sm.add_constant(np.column_stack((x1, x2))) beta = [1.0, 2.0, -1.5] xb = np.dot(X, beta) # linear predictor exb = np.exp(xb) nby = nbinom.rvs(exb, theta) mydata = {} # build data dictionary mydata['N'] = nobs # sample size mydata['X'] = X # predictors mydata['Y'] = nby # response variable mydata['K'] = len(beta)
import pymc3 as pm from scipy.stats import binom import matplotlib.pyplot as plt from plot_post import plot_post # THE DATA. # For each subject, specify the condition s/he was in, # the number of trials s/he experienced, and the number correct. # (Randomly generated fictitious data.) npg = 20 # number of subjects per group ntrl = 20 # number of trials per subject cond_of_subj = np.repeat([0, 1, 2, 3], npg) n_trl_of_subj = np.repeat([ntrl], 4*npg) np.random.seed(47401) n_corr_of_subj = np.concatenate((binom.rvs(n=ntrl, p=.61, size=npg), binom.rvs(n=ntrl, p=.50, size=npg), binom.rvs(n=ntrl, p=.49, size=npg), binom.rvs(n=ntrl, p=.51, size=npg))) n_subj = len(cond_of_subj) n_cond = len(set(cond_of_subj)) # THE MODEL with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Constants for hyperprior: shape_Gamma = 1.0 rate_Gamma = 0.1
# X12: Education level # X13: Employed from scipy.stats import binom import numpy as np import csv samplesize = 1000 outfile = open("humantrafficking_data.csv","w") out = csv.writer(outfile,delimiter=",",lineterminator="\n") header = ["Trafficker","Location", "Gender", "Age", "Marital Status", "US Citizenship", "Education","Employed","Gang Member","Arrested","Personal Crime","Property Crime","Inchoate Crime","Statutory Crime","Misdemeanor","Felony"] out.writerow(header) #print header cities = ["Atlanta", "Chicago", "Dallas", "Detroit", "Las Vegas", "San Diego", "San Francisco", "St. Louis", "Tampa", "DC"] t = binom.rvs(1,.5, size=samplesize) for i in range(len(t)): arr = binom.rvs(1,.85) loc = cities[np.random.randint(0,len(cities))] if t[i] == 1: # gender 25/75 ratio gender = binom.rvs(1,.75) # age 18-52 age = np.random.randint(18,52) # marital status ms = binom.rvs(1,.75) # in a gang gang = binom.rvs(1,.7) else: # gender 40/60 ratio