def hypothesisTest(self, seq1, seq2, totalSeq1, totalSeq2): replicates = self.preferences['Replicates'] # create null distribution pooledN = totalSeq1 + totalSeq2 pooledP = float(seq1 + seq2) / pooledN diff = [] for dummy in xrange(0, replicates): c1 = binomial(totalSeq1, pooledP) c2 = binomial(totalSeq2, pooledP) diff.append(float(c1) / totalSeq1 - float(c2) / totalSeq2) # determine number of replicates w/ an effect size more extreme than the observed data obsDiff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2 leftCount = 0 rightCount = 0 twoSidedCount = 0 for value in diff: if value <= obsDiff: leftCount += 1 if value >= obsDiff: rightCount += 1 if abs(value) >= abs(obsDiff): twoSidedCount += 1 oneSidedCount = leftCount if rightCount < oneSidedCount: oneSidedCount = rightCount return float(oneSidedCount) / replicates, float(twoSidedCount) / replicates, ''
def test_n_zero(self): # Tests the corner case of n == 0 for the binomial distribution. # binomial(0, p) should be zero for any p in [0, 1]. # This test addresses issue #3480. zeros = np.zeros(2, dtype='int') for p in [0, .5, 1]: assert_(random.binomial(0, p) == 0) np.testing.assert_array_equal(random.binomial(zeros, p), zeros)
def test_mb5(): xmax = 3 n = 1000 x = np.concatenate((binomial(xmax,0.1,9*n),binomial(xmax,0.9,n))) MB = tbm.TwoBinomialMixture() MB.EMalgo(x,xmax) MB.parameters() assert(np.absolute(MB.Lambda-0.9)<0.1)
def test_mb7(): xmax = 5 n = 100 x = np.concatenate((binomial(xmax,0.1,99*n),binomial(xmax,0.8,n))) MB = tbm.TwoBinomialMixture() MB.EMalgo(x,xmax) MB.parameters() assert MB.r1>0.7
def test_mb6(): xmax = 5 n = 1000 x = np.concatenate((binomial(xmax,0.05,9*n),binomial(xmax,0.5,n))) MB = tbm.TwoBinomialMixture() MB.EMalgo(x,xmax) MB.parameters() assert MB.r0<.1
def bino_mutual(filename,periods): print ('Binomial simulation of mutualistic interaction %s %ld' %(filename,periods)) tinic=time() filename_a=filename+'_a.txt' minputchar_a=dlmreadlike(filename_a) nrows_a=len(minputchar_a) ncols_a=len(minputchar_a[0]) for i in range (nrows_a): for j in range (ncols_a): minputchar_a[i][j]=float(minputchar_a[i][j]) numspecies_a=ncols_a print ("numspecies a %d" %numspecies_a) K_a=[] Nindividuals_a=[] rowNindividuals_a=[] r_a=[] filename_b=filename+'_b.txt' minputchar_b=dlmreadlike(filename_b) nrows_b=len(minputchar_b) ncols_b=len(minputchar_b[0]) for i in range (nrows_b): for j in range (ncols_b): minputchar_a[i][j]=float(minputchar_b[i][j]) numspecies_b=nrows_b-3; print ("numspecies b %d" %numspecies_b) K_b=[] Nindividuals_b=[] rowNindividuals_b=[] r_b=[] for n in range(numspecies_a): rowNindividuals_a.append(int(minputchar_a[nrows_a-3][n])) K_a.append(int(minputchar_a[nrows_a-2][n])) r_a.append(minputchar_a[nrows_a-1][n]) Nindividuals_a.append(rowNindividuals_a) period_year=365 for k in range (periods-1): rowNi=[] for n in range (numspecies_a): rperiod=float(r_a[n]/period_year) # Variation due to malthusian parameter r incNmalth=binomial(Nindividuals_a[k][n],1-exp(-1*rperiod)); # Second term of logistic equation incNlogistic= binomial((Nindividuals_a[k][n]**2)/K_a[n],1-exp(-1*rperiod)); # Terms due to other species incNOtherspecies=0; for j in range(numspecies_a): incNOtherspecies=incNOtherspecies+binomial(round(Nindividuals_a[k][j]*Nindividuals_a[k][n]/K_a[n]),1-exp(-1*rperiod*minputchar_a[n][j])); rowNi.append(round(Nindividuals_a[k][n]+incNmalth-incNlogistic+incNOtherspecies)) Nindividuals_a.append(rowNi) tfin=time() print ("Elapsed time %f s" % (tfin-tinic)) dlmwritelike(filename_a,periods,Nindividuals_a,'bino') plt.plot(Nindividuals_a) plt.show()
def test_mb4(): xmax = 5 n = 100 x = np.concatenate((binomial(xmax,0.1,n),binomial(xmax,0.9,n))) MB = tbm.TwoBinomialMixture() MB.EMalgo(x,xmax) MB.parameters() kappa = MB.kappa() assert(np.absolute(MB.Lambda-0.5)<0.1)
def random_walk(numsteps): x = np.zeros(numsteps) y = np.zeros(numsteps) for i in range(numsteps): if random.binomial(1, 0.5) == 0.: y[i] = random.binomial(1, 0.99)*random.choice([-1, 1]) else: x[i] = random.binomial(1, 0.99)*random.choice([-1, 1]) lim = max(max(abs(np.cumsum(x))), max(abs(np.cumsum(y)))) + 1. return np.cumsum(x), np.cumsum(y), (-lim, lim), (-lim, lim)
def getRandomCoinFlip(p): max_flips = 10000 coin_flips = binomial(1, p, max_flips) position = 0 while True: if position == max_flips: coin_flips = binomial(1, p, max_flips) position = 0 else: yield coin_flips[position] position += 1
def bern_y(X,p1,base_prob=.25,beta_sd=1): n,p = X.shape X_1 = X[:,:p1] v = 0 while v<1E-5: beta = npran.randn(p1)*beta_sd if p1>0: eta = cutoff(np.dot(X_1,beta)+logit(base_prob)) y = npran.binomial(1,invlogit(eta),n) else: y = npran.binomial(1,base_prob,n) v = np.min(nplin.svd(np.hstack((X,y[:,np.newaxis])))[1]) return y
def perform(self, X): images = numpy.copy(X) for idx in range(images.shape[0]): img_arr = images[idx].reshape(self.img_width, self.img_height) # Perform a horizontal reflexion, maybe... if binomial(n=1,p=self.p_hsymetry) == 1: img_arr = img_arr[:,::-1] # Perform a translation, maybe... if binomial(n=1,p=self.p_translation) == 1: h_translation = numpy.random.randint(self.min_translation_pixels, self.max_translation_pixels + 1) v_translation = numpy.random.randint(self.min_translation_pixels, self.max_translation_pixels + 1) # Perform horizontal translation if h_translation < 0: temp = img_arr[:,-h_translation:] img_arr[:,:h_translation] = temp img_arr[:,h_translation:] = 0 elif h_translation > 0: temp = img_arr[:,:-h_translation] img_arr[:,h_translation:] = temp img_arr[:,:h_translation] = 0 # Perform vertical translation if v_translation < 0: temp = img_arr[-v_translation:,:] img_arr[:v_translation,:] = temp img_arr[v_translation:,:] = 0 elif v_translation > 0: temp = img_arr[:-v_translation,:] img_arr[v_translation:,:] = temp img_arr[:v_translation,:] = 0 # Perform a rotation, maybe... if binomial(n=1,p=self.p_rotation) == 1: deg_rotation = numpy.random.randint(self.min_rotation_degrees, self.max_rotation_degrees + 1) if deg_rotation != 0: img = Image.fromarray(img_arr) img = img.rotate(deg_rotation) img_arr = numpy.array(img) images[idx] = img_arr.reshape(self.img_width * self.img_height) return images
def genXy_bern_X_norm_beta(seed,n,p1,pnull,x_prob=.25,base_prob=.25,beta_sd=1): """ The X are normal. p1 predictive vars, pnull null vars. beta on the p1 vars is ~normal(0,beta_sd) and the intercept is logit(base_prob)""" if not seed == None: npran.seed(seed) X_1 = npran.binomial(1,x_prob,(n,p1)) X_null = npran.binomial(1,x_prob,(n,pnull)) X = np.concatenate((X_1,X_null),axis=1) beta = npran.randn(p1)*beta_sd if p1>0: eta = cutoff(np.dot(X_1,beta)+logit(base_prob)) y = npran.binomial(1,invlogit(eta),n) else: y = npran.binomial(1,base_prob,n) return X,y
def hht(nruns): sum_flips = 0 for i in range(nruns): flips = [] for j in range(3): flips.append(npr.binomial(1, 0.5, 3)[j]) while not(flips[len(flips)-1] == 0 and flips[len(flips)-2] == 1 and flips[len(flips)-3] == 1): flips.extend([npr.binomial(1, 0.5, 1)[0]]) else: pass sum_flips += len(flips) print(sum_flips) avg = float(sum_flips/nruns) return avg
def maisant_exp(rounds): # both max_repeats and rounds need to go to infinity to obtain # covnergence pmf = np.array([0,0,0], dtype=float) for i in range(rounds): flip = [binomial(1,0.5), binomial(1,0.5)] if flip == [1,1]: continue elif flip == [1,0]: pmf[0] += 1 elif flip == [0,1]: pmf[1] += 1 elif flip == [0,0]: pmf[2] += 1 else: print 'ERROR' exit return pmf/sum(pmf)
def transition(iD, ps): h, r, u, nr, im, Si = ps if len(list(iD)) == 0: return iD for k, v in iD.items(): ri = v['q']/v['mt'] ap = 1/(1+ri) * v['age']/(1+v['age']) dp = v['rp'] * ri/(1+ri) if v['st'] == 'd' and binomial(1, dp) == 1: iD[k]['st'] = 'a' elif v['st'] == 'a' and binomial(1, ap) == 1: iD[k]['st'] = 'd' return iD
def SimulateCreditNetwork(CN, params, DP, TR, BV, SC): """ CN - credit network DP - default probability array TR - transaction rate matrix BV - buy value matrix SC - sell cost matrix price - function to determine a price from value and cost events - number of transactions to simulate """ price = params["price"] events = params["events"] strategies = params["strategies"] prevent_zeros = params["prevent_zeros"] payoffs = dict([(n,0.) for n in CN.nodes]) defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes) # If all agents with the same strategy default, we'll get bad payoff data while prevent_zeros: prevent_zeros = False for strat in set(strategies): agents = filter(lambda a: strategies[a]==strat, CN.nodes) if all([a in defaulters for a in agents]): prevent_zeros = True defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes) break for d in defaulters: for n in CN.nodes: if CN.adjacent(n, d): payoffs[n] -= CN.weights[(n, d)] CN.removeNode(d) del payoffs[d] m = R.multinomial(events, array(TR.flat)) l = TR.shape[0] transactors = sum([[(i/l,i%l)]*m[i] for i in range(l**2)], []) R.shuffle(transactors) for b,s in transactors: try: assert b in CN.nodes and s in CN.nodes CN.routePayment(b, s, price(BV[b,s], SC[b,s])) except (AssertionError, CreditError): continue payoffs[b] += BV[b,s] payoffs[s] -= SC[b,s] return payoffs
def num_parental_removed(self): p = self.immune_removal_probability() n = len(self.viruses) removed = binomial(n, p) return removed
def get_next_batch(self): epoch, batchnum = self.curr_epoch, self.curr_batchnum self.advance_batch() data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand labels = n.require(binomial(1, 0.3, (self.num_cases, self.num_classes)), requirements='C', dtype=n.single) return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels}
def genXy_binary_X_norm_beta(seed,n,p1,pnull,base_prob=.25,beta_sd=1,A_base_diag=-1,A_sd=.2): ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal ''' if not seed == None: npran.seed(seed) p = p1 + pnull A = npran.normal(0,.2,(p,p))-np.diag(A_base_diag*np.ones(p)) X = draw_random_binary(n,A) X_1 = X[:,:p1] X_null = X[:,p1:] beta = npran.randn(p1)*beta_sd if p1>0: eta = cutoff(np.dot(X_1,beta)+logit(base_prob)) y = npran.binomial(1,invlogit(eta),n) else: y = npran.binomial(1,base_prob,n) return X,y
def allow_immune_removal_slow(self): """ This method allows the removal of a certain number of viruses to be removed from the host due to immune system pressure. Note: this method may be deprecated in favor of precomputing the number of progeny. """ current_time = self.environment.current_time last_infection_time = max(self.infection_history.keys()) time_difference = current_time - last_infection_time p = float(time_difference) / (self.immune_halftime + time_difference) n = len(self.viruses) # # print("Time Difference: %s, Probability: %s" % (time_difference, # p)) num_viruses_to_remove = binomial(n, p) # num_viruses_to_remove = int(0.6 * len(self.viruses)) # # print('Removing %s viruses out of %s viruses from host %s.' % ( # num_viruses_to_remove, len(self.viruses), id(self))) viruses_to_remove = sample(self.viruses, num_viruses_to_remove) for virus in viruses_to_remove: self.remove_virus(virus) # # print('Host %s is left with %s viruses.' % (id(self), len(self. # viruses))) return self
def drop_samples(game, prob): """Drop samples from a sample game Samples are dropped independently with probability prob.""" sample_map = {} for prof, pays in zip(np.split(game.profiles, game.sample_starts[1:]), game.sample_payoffs): num_profiles, _, num_samples = pays.shape perm = rand.permutation(num_profiles) prof = prof[perm] pays = pays[perm] new_samples, counts = np.unique( rand.binomial(num_samples, prob, num_profiles), return_counts=True) splits = counts[:-1].cumsum() for num, prof_samp, pay_samp in zip( new_samples, np.split(prof, splits), np.split(pays, splits)): if num == 0: continue prof, pays = sample_map.setdefault(num, ([], [])) prof.append(prof_samp) pays.append(pay_samp[..., :num]) if sample_map: profiles = np.concatenate(list(itertools.chain.from_iterable( x[0] for x in sample_map.values())), 0) sample_payoffs = tuple(np.concatenate(x[1]) for x in sample_map.values()) else: # No data profiles = np.empty((0, game.num_role_strats), dtype=int) sample_payoffs = [] return rsgame.samplegame_copy(game, profiles, sample_payoffs, False)
def SequenceDynSelf(protocell,mu,L,N): q = (1-mu)**L total=np.sum(protocell) global test while (total != 2*N): "Pick the sequence type" sec_freq=protocell/total values=np.arange(len(protocell)) custm = sps.rv_discrete(name='custm', values=(values, sec_freq)) R = custm.rvs(size=1) R=R.tolist() R=int(R[0]) sample=R test = nprandom.binomial(1,q) if sample == 0: protocell[0]=protocell[0]+1 elif test == 1: protocell[sample]=protocell[sample]+1 else: protocell[0]=protocell[0]+1 total=np.sum(protocell) return protocell
def __init__(self, setup, data, particle, neg_samples, ent_burnin=0, pred_burnin=0): """ Initialise the data interface :param setup: semantic function model with training setup :param data: observed data of the form (nodeid, pred, out_labs, out_ids, in_labs, in_ids), with increasing nodeids :param particle: fantasy particle of the form (nodeid, out_labs, out_ids, in_labs, in_ids), with increasing nodeids :param neg_samples: number of negative pred samples to draw for each node :param ent_burnin: (default 0) number of update steps to take for latent entities :param pred_burnin: (default 0) number of update steps to take for negative preds """ # Training setup self.setup = setup self.model = setup.model # Negative pred samples self.NEG = neg_samples # Data self.filename = None self.load_data(data, ent_burnin, pred_burnin) # Fantasy particles self.neg_nodes = particle self.neg_link_counts = zeros(self.model.L) for i, n in enumerate(self.neg_nodes): assert i == n[0] for label in n[1]: # Count outgoing links only, and assume we have entire graphs (similarly, only outgoing links are observed) self.neg_link_counts[label] += 1 self.K = len(self.neg_nodes) self.neg_ents = random.binomial(1, self.model.C/self.model.D, (self.K, self.model.D))
def train_test_divide(mat, percent=0.9): M, N = mat.shape test_set = np.array(rnd.binomial(1.0, 1.0 - percent, size=M * N), dtype=bool) test_set_mat = np.unravel_index(test_set, (M, N)) test_val_mat = mat[test_set_mat].copy() mat[test_set_mat] = 0 return test_set_mat, test_val_mat
def generate_docs(phi, ndocs, nwords_per_doc, alpha=0.1, p0=0.8): K, V = phi.shape theta = np.zeros((ndocs, K), dtype=float) switch = np.append([0], binomial(1, p0, ndocs - 1)) switch = switch == 0 samples = dirichlet([alpha] * K, size=int(switch.sum())) theta[switch] = samples last_theta = None for t in xrange(0, ndocs): if switch[t] == True: last_theta = theta[t] continue theta[t] = last_theta def gen_z(theta): z = np.repeat(np.arange(K), multinomial(nwords_per_doc, theta, size=1)[0]) np.random.shuffle(z) return z z = np.apply_along_axis(gen_z, 1, theta) def gen_w(z): return np.random.multinomial(1, phi[z]).nonzero()[0][0] w = np.vectorize(gen_w)(z) return w, z, theta, switch
def sim(N, p0, r): '''simulate from Wright Fisher with pure drift''' p = p0 for _ in range(r): p = nprand.binomial(N, p, 1) p = p[0] / float(N) return p
def theta_t(th, n, p): pt = pt_t(th, n, p) if binomial(1, pt) == 1: return (th, pt, np.log(pt)) tt = dirichlet(alpha + n, 1)[0] return (tt, pt, np.log(1-pt) + dir_logpdf(tt, alpha + n))
def randStats(self): randOB = binomial(self.pa, self.stats[4]) self.randStats[4] = randOB/self.pa #get rand OBP self.randStats[0] = randOB/self.ob * self.stats[0] # get rand R self.randStats[1] = randOB/self.ob * self.stats[1] # get rand HR self.randStats[2] = randOB/self.ob * self.stats[2] # get rand RBI self.randStats[3] = randOB/self.ob * self.stats[3] # get rand SB
def prob_thin_mask(counts,pdict,rnd_pct=0.05): '''Generate a boolean mask given: pdict: Probability dictionary from build_prob_dict. counts: A list of derived allele counts. rnd_pct: Random probability of thinning out. Returned outmask[i] is True if site i should be kept. Can be added (with and) to nlsmask to generate a final mask. Note: This function also filters out non-segregating sites. ''' cond = lambda x: binomial(1,x) cond2 = lambda: binomial(1,1.0-rnd_pct) test = lambda x: True if cond(x) and cond2() else False outmask = [] for i,count in enumerate(counts): outmask.append(test(pdict[count])) return np.array(outmask,dtype=bool)
def make_noisy_probs(exact, noise_type, noise): """Noisify probabilities Args: exact - 2D np.array, rows are options, cols are ppl model_params: dict Outputs: 2d np.aray, rows are options, cols are ppl """ if noise_type == "noiseless": return exact elif noise_type == "binomial": num_hypothetical_trials = noise num_successes = nr.binomial(num_hypothetical_trials, exact[0]) noisy0 = num_successes / num_hypothetical_trials return np.array([noisy0, 1 - noisy0]) elif noise_type == "beta": alpha_beta = jmutils.beta_shape(exact[0], noise) noisy0 = nr.beta(*alpha_beta) return np.array([noisy0, 1 - noisy0]) elif noise_type == "truncnorm": scale = noise noisy0 = truncnorm.rvs(-exact[0] / scale, (1 - exact[0]) / scale, loc=exact[0], scale=scale) return np.array([noisy0, 1 - noisy0]) elif noise_type == "log_odds": lo = np.log(exact[0] / exact[1]) noisy_lo = nr.normal(lo, noise) given_1 = 1 / (math.exp(noisy_lo) + 1) return np.array([1 - given_1, given_1]) else: print("Error: meta noise_type not specified correctly")
def test_single_sequence_1Q(self): N = 5 # Counts per timestep T = 100 # Number of timesteps # The confidence of the statistical tests. Here we set it to 0.999, which means that # if we detect drift we are 0.999 confident that we haven't incorrectly rejected the # initial hypothesis of no drift. confidence = 0.999 # A drifting probability to obtain the measurement outcome with index 1 (out of [0,1]) def pt_drift(t): return 0.5 + 0.2 * np.cos(0.1 * t) # A drift-free probability to obtain the measurement outcome with index 1 (out of [0,1]) def pt_nodrift(t): return 0.5 # If we want the sequence to have a label, we define a list for this (here, a list of length 1). # The labels can, but need not be, pyGSTi GateString objects. sequences = [ pygsti.objects.GateString(None, 'Gx(Gi)^64Gx'), ] # If we want the outcomes to have labels, we define a list for this. outcomes = ['0', '1'] # Let's create some fake data by sampling from these p(t) at integer times. Here we have # created a 1D array, but we could have instead created a 1 x 1 x 1 x T array. data_1seq_drift = np.array( [binomial(N, pt_drift(t)) for t in range(0, T)]) data_1seq_nodrift = np.array( [binomial(N, pt_nodrift(t)) for t in range(0, T)]) # If we want frequencies in Hertz, we need to specify the timestep in seconds. If this isn't # specified, the frequencies are given in 1/timestep with timestep defaulting to 1. timestep = 1e-5 # We hand these 1D arrays to the analysis function, along with the number of counts, and other # optional information results_1seq_drift = drift.do_basic_drift_characterization( data_1seq_drift, counts=N, outcomes=outcomes, confidence=confidence, timestep=timestep, indices_to_sequences=sequences) results_1seq_nodrift = drift.do_basic_drift_characterization( data_1seq_nodrift, counts=N, outcomes=outcomes, confidence=confidence, timestep=timestep, indices_to_sequences=sequences) if bMPL: results_1seq_drift.plot_power_spectrum() results_1seq_nodrift.plot_power_spectrum() print(results_1seq_drift.global_pvalue) print(results_1seq_nodrift.global_pvalue) # The power spectrum obtained after averaging over everthing print(results_1seq_drift.global_power_spectrum[:4]) # The power spectrum obtained after averaging over everthing except sequence label print(results_1seq_drift.ps_power_spectrum[0, :4]) # The power spectrum obtained after averaging over everthing except entity label print(results_1seq_drift.pe_power_spectrum[0, :4]) # The power spectrum obtained after averaging over everthing except sequene and entity label print(results_1seq_drift.pspe_power_spectrum[0, 0, :4]) # The two power spectra obtained after averaging over nothing print(results_1seq_drift.pspepo_power_spectrum[0, 0, 0, :4]) print(results_1seq_drift.pspepo_power_spectrum[0, 0, 1, :4]) # Lets create an array of the true probability. This needs to be # of dimension S x E x M x T parray_1seq = np.zeros((1, 1, 2, T), float) parray_1seq[0, 0, 0, :] = np.array([pt_drift(t) for t in range(0, T)]) parray_1seq[0, 0, 1, :] = 1 - parray_1seq[0, 0, 0, :] # The measurement outcome index we want to look at (here the esimated p(t) # for one index is just 1 - the p(t) for the other index, because we are # looking at a two-outcome measurement). outcome = 1 # If we hand the parray to the plotting function, it will also plot # the true probability alongside our estimate from the data if bMPL: results_1seq_drift.plot_estimated_probability(sequence=0, outcome=outcome, parray=parray_1seq, plot_data=True)
def rvs(self, size=None): return random.binomial(self.n, self.p, size=size)
def test_p_zero_stream(self): # Regression test for gh-14522. Ensure that future versions # generate the same variates as version 1.16. np.random.seed(12345) assert_array_equal(random.binomial(1, [0, 0.25, 0.5, 0.75, 1]), [0, 0, 0, 1, 1])
def main(split): kept_indices = None for quality in ("LR", "HR"): img_folder = '../../datasets/{}_{}_bicLRx4/{}/x4/*'.format( # glob matching pattern dataset_name, split, quality) lmdb_save_path = '../../datasets/{}_{}_bicLRx4/{}/x4{}.lmdb'.format( dataset_name, split, quality, removed_fraction if removed_fraction else "") meta_info = {'name': 'DIV2K800_sub_GT'} mode = 2 # 1 for reading all the images to memory and then writing to lmdb (more memory); # 2 for reading several images and then writing to lmdb, loop over (less memory) batch = 1000 # Used in mode 2. After batch images, lmdb commits. ########################################### if not lmdb_save_path.endswith('.lmdb'): raise ValueError("lmdb_save_path must end with \'lmdb\'.") #### whether the lmdb file exist if osp.exists(lmdb_save_path): print( 'Folder [{:s}] already exists. Exit...'.format(lmdb_save_path)) sys.exit(1) img_list = sorted(glob.glob(img_folder)) if removed_fraction and split == 'valid': if kept_indices is None: kept_indices = binomial(1, (1 - removed_fraction), len(img_list)) img_list = [ item for k, item in enumerate(img_list) if kept_indices[k] ] if mode == 1: print('Read images...') dataset = [cv2.imread(v, cv2.IMREAD_UNCHANGED) for v in img_list] data_size = sum([img.nbytes for img in dataset]) elif mode == 2: print('Calculating the total size of images...') data_size = sum(os.stat(v).st_size for v in img_list) else: raise ValueError('mode should be 1 or 2') key_l = [] resolution_l = [] pbar = ProgressBar(len(img_list)) env = lmdb.open(lmdb_save_path, map_size=data_size * 10) txn = env.begin(write=True) # txn is a Transaction object for i, v in enumerate(img_list): pbar.update('Write {}'.format(v)) base_name = osp.splitext(osp.basename(v))[0] key = base_name.encode('ascii') data = dataset[i] if mode == 1 else cv2.imread( v, cv2.IMREAD_UNCHANGED) if data.ndim == 2: H, W = data.shape C = 1 else: H, W, C = data.shape txn.put(key, data) key_l.append(base_name) resolution_l.append('{:d}_{:d}_{:d}'.format(C, H, W)) # commit in mode 2 if mode == 2 and i % batch == 1: txn.commit() txn = env.begin(write=True) txn.commit() env.close() print('Finish writing lmdb.') #### create meta information # check whether all the images are the same size same_resolution = (len(set(resolution_l)) <= 1) if same_resolution: meta_info['resolution'] = [resolution_l[0]] meta_info['keys'] = key_l print( 'All images have the same resolution. Simplify the meta info...' ) else: meta_info['resolution'] = resolution_l meta_info['keys'] = key_l print( 'Not all images have the same resolution. Save meta info for each image...' ) #### pickle dump pickle.dump(meta_info, open(osp.join(lmdb_save_path, 'meta_info.pkl'), "wb")) print('Finish creating lmdb meta info.')
info += rv.cov() elif ties == 'cox': raise NotImplementedError('Cox tie breaking method not \ implemented') else: raise NotImplementedError('tie breaking method not recognized') return score if __name__ == '__main__': import numpy.random as R n = 100 X = np.array([0] * n + [1] * n) b = 0.4 lin = 1 + b * X Y = R.standard_exponential((2 * n, )) / lin delta = R.binomial(1, 0.9, size=(2 * n, )) subjects = [Observation(Y[i], delta[i]) for i in range(2 * n)] for i in range(2 * n): subjects[i].X = X[i] import scikits.statsmodels.sandbox.formula as F x = F.Quantitative('X') f = F.Formula(x) c = CoxPH(subjects, f) # c.cache() # temp file cleanup doesn't work on windows c = CoxPH(subjects, f, time_dependent=True) c.cache() #this creates tempfile cache,
def _rvs(self, n, p): return mtrand.binomial(n, p, self._size)
def get_binomial_mab_env(params: Sequence[Tuple[int, float]]) -> 'MABEnv': return MABEnv( [lambda n=n, p=p: float(binomial(n, p, 1)[0]) for n, p in params])
def next_state(self, state: State) -> State: up_move: int = binomial(1, self.up_prob(state), 1)[0] return Process3.State(num_up_moves=state.num_up_moves + up_move, num_down_moves=state.num_down_moves + 1 - up_move)
def next_state(self, state: State) -> State: up_move: int = binomial(1, self.up_prob(state), 1)[0] return Process2.State(price=state.price + up_move * 2 - 1, is_prev_move_up=bool(up_move))
def next_state(self, state: State) -> State: up_move: int = binomial(1, self.up_prob(state), 1)[0] return Process1.State(price=state.price + up_move * 2 - 1)
def ar_trace(frame: int, pfire: float, g: np.ndarray): S = random.binomial(n=1, p=pfire, size=frame).astype(np.float) C = apply_arcoef(S, g) return C, S
def binomial_sigma(p): sample = binomial(n=1, p=p) return sample
#产生10个【1,6)之间浮点型随机数array 5 * random.random(10) + 1 random.uniform(1, 6, 10) #产生10个[1,6)之间的整型随机数 random.randint(1, 6, 10) '''产生2*5的标准正态分布样本 array([[-0.676922 , 0.61167629], [ 1.03099952, 0.93128012], [-0.83921752, -0.30921238], [ 0.33126343, 0.97554513], [-0.47917424, -0.18565898]])''' random.normal(size=(5, 2)) #产生5个,n=5,p=0.5的二项分布样本 array([2, 2, 3, 3, 4]) random.binomial(n=5, p=0.5, size=5) # 从a中有回放的随机采样7个 array([0, 2, 4, 2, 0, 4, 9]) a = np.arange(10) random.choice(a, 7) #从a中无放回的随机采样7个 array([4, 7, 8, 3, 5, 1, 0]) random.choice(a, 7, replace=False) # 对a进行乱序并返回一个新的array b = random.permutation(a) # 对a进行in-place乱序 random.shuffle(a) # 生成一个长度为9的随机bytes序列并作为str返回 # b'=\xa6\xaeK\xb8\xbf&\xa2\xf4'
def get_bernoulli_mab_env(probs: Sequence[float]) -> 'MABEnv': return MABEnv([lambda p=p: float(binomial(1, p, 1)[0]) for p in probs])
def generate_curia_synthetic_data( # Binary or continious binary_treatment: bool = True, binary_outcome: bool = False, # Number of records n_train: int = 1000, n_test: int = 1000, # Number of features to generate by type binary_dim: int = 5, uniform_dim: int = 5, normal_dim: int = 5, # Features to have effect on ITE, outcome and treatment propensity n_confounders: int = 2, n_features_outcome: int = 3, n_features_treatment_effect: int = 3, n_features_propensity: int = 3, # outcome_noise_sd outcome_noise_sd: int = 1, # Features to drop missing_data_scaler: float = 0.5, # Treatment share scaler treatment_share_scaler: float = 0.05, # Random seed seed: int = 42) -> object: ############################################################# # Initiate variables and make some checks ############################################################# # Sum train and test together for now n_total = n_train + n_test # Calculate actual values for the number of the missing features n_features_to_drop_outcome_not_counfounders = math.floor( (n_features_outcome - n_confounders) * missing_data_scaler) n_features_to_drop_treatment_effect_not_counfounders = math.floor( (n_features_treatment_effect - n_confounders) * missing_data_scaler) n_features_to_drop_confounders = math.floor(n_confounders * missing_data_scaler) n_features_to_drop_propensity = math.floor(n_features_propensity * missing_data_scaler) # create empty dataframe modeling_df = pd.DataFrame() ############################################################# # Generate features ############################################################# np.random.seed(seed) # Generate Age - we will add mean=70 and sd=30 later to avoid high influence of this variable modeling_df['age'] = normal(loc=0, scale=1, size=n_total) # Generate features with uniform distribution - will multiply to 10 later for i in range(0, uniform_dim): modeling_df['sdoh_' + str(i)] = np.ceil(uniform(size=n_total) * 10) / 10 # Generate features with bernoulli distribution binary_coefs = uniform(size=binary_dim) for i in range(0, binary_dim): binary_coef = binary_coefs[i] modeling_df['binary_flag_' + str(i)] = binomial(n=1, p=binary_coef, size=n_total) # Generate features with normal distribution multivariate_df = pd.DataFrame( multivariate_normal(np.zeros(normal_dim), np.diag(np.ones(normal_dim)), n_total), columns=['vector_' + str(i) for i in range(0, normal_dim)]) modeling_df = pd.concat([modeling_df, multivariate_df], axis=1) # Extract name of the features features = pd.Series(modeling_df.columns) ############################################################# # Sample features for the treatment effect and the outcomes ############################################################# # sample features for the confounders confounders_features = features.sample(n_confounders, random_state=1) outcome_features_not_confounders = features[ ~features.isin(confounders_features)].sample(n_features_outcome - n_confounders, random_state=1) outcome_features = pd.concat( [outcome_features_not_confounders, confounders_features]) # sample features for the treatment effect treatment_effect_features_not_confounders = features[ ~features.isin(outcome_features)].sample(n_features_treatment_effect - n_confounders, random_state=1) treatment_effect_features = pd.concat( [treatment_effect_features_not_confounders, confounders_features]) # sample features for the propensity score propensity_score_features = features.sample(n_features_propensity, random_state=1) ############################################################# # Generate outcomes ############################################################# # Generate coefficients beta_outcome = normal(0, 1, n_features_outcome) # Generate outcomes modeling_df['y0'] = np.dot(modeling_df[outcome_features], beta_outcome) + normal(0, outcome_noise_sd) ############################################################# # Generate treatment effect ############################################################# # Generate coeficients beta_te = normal(0, 1, n_features_treatment_effect) # Generate outcomes modeling_df['true_ite'] = np.dot(modeling_df[treatment_effect_features], beta_te) ############################################################# # Generate propensity score ############################################################# # Generate coeficients for propensity score # Draw coefficients from beta distributions beta_propensity_score = normal(0, 1, n_features_propensity) # Generate propensity score and rescale it again from 0 to 1 modeling_df['true_treatment_propensity'] = np.dot( modeling_df[propensity_score_features], beta_propensity_score) # Center the distribution first modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] - \ modeling_df['true_treatment_propensity'].mean() # Rescale to -1 to +1 modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] / \ modeling_df['true_treatment_propensity'].abs().max() # Rescale to get treatment_share_scaler modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] * \ min(treatment_share_scaler, 1 - treatment_share_scaler) # Move to the right modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] + \ treatment_share_scaler ############################################################# # Generate treatment ############################################################# if binary_treatment: modeling_df['treatment'] = binomial( n=1, p=modeling_df['true_treatment_propensity'], size=n_total) else: modeling_df['treatment'] = modeling_df['true_treatment_propensity'] ############################################################# # Generate outcome with treatment effect ############################################################# modeling_df['y1'] = modeling_df['y0'] + modeling_df['true_ite'] modeling_df['y'] = modeling_df['y0'] + \ modeling_df['true_ite'] * modeling_df['treatment'] # Rescale from 0 to 1 y_min = modeling_df[['y', 'y0', 'y1']].min().min() y_max = modeling_df[['y', 'y0', 'y1']].max().max() scale_factor = 1 / (y_max - y_min) modeling_df['y'] = (modeling_df['y'] - y_min) * scale_factor modeling_df['y0'] = (modeling_df['y0'] - y_min) * scale_factor modeling_df['y1'] = (modeling_df['y1'] - y_min) * scale_factor modeling_df['true_ite_rescaled'] = modeling_df['true_ite'] * scale_factor modeling_df['true_ite'] = modeling_df['y1'] - \ modeling_df['y0'] # modeling_df['true_ite'] * scale_factor # If binary - rescale to [0,1] and use as probability to generate bernoulli outcome if binary_outcome: modeling_df['y'] = binomial(n=1, p=modeling_df['y'], size=n_total) ############################################################# # Features final adjustments ############################################################# # Rescale age feature modeling_df['age'] = np.where(modeling_df['age'] * 30 + 70 < 50, 50, modeling_df['age'] * 30 + 70) # Rescale SDOH features for i in range(0, uniform_dim): modeling_df['sdoh_' + str(i)] = modeling_df['sdoh_' + str(i)] * 10 ############################################################# # Drop features ############################################################# # features_to_drop_outcome_not_counfounders features_to_drop_outcome_not_counfounders = outcome_features_not_confounders.sample( n_features_to_drop_outcome_not_counfounders, random_state=1) # features_to_drop_treatment_effect_not_confounders features_to_drop_treatment_effect_not_confounders = treatment_effect_features_not_confounders.sample( n_features_to_drop_treatment_effect_not_counfounders, random_state=1) # features_to_drop_confounders features_to_drop_confounders = confounders_features.sample( n_features_to_drop_confounders, random_state=1) # features_to_drop_confounders features_to_drop_propensity = propensity_score_features.sample( n_features_to_drop_propensity, random_state=1) # Now drop all those features all_features_to_drop = pd.concat([ features_to_drop_outcome_not_counfounders, features_to_drop_treatment_effect_not_confounders, features_to_drop_confounders, features_to_drop_propensity ]).drop_duplicates() for col in all_features_to_drop: # print('Dropping {} from the columns'.format([col])) assert ( col in modeling_df), 'All features to drop should be in the featureset' del modeling_df[col] ############################################################# # Return results ############################################################# # Randomly select train and test y = modeling_df['y'] t = modeling_df['treatment'] true_ite = modeling_df['true_ite'] true_treatment_propensity = modeling_df['true_treatment_propensity'] X = modeling_df.drop([ 'y', 'y0', 'y1', 'treatment', 'true_ite', 'true_treatment_propensity', 'true_ite_rescaled' ], axis=1) X, Xte, T, _, Y, _, _, ITEte = train_test_split(X.to_numpy(), t.to_numpy(), y.to_numpy(), true_ite.to_numpy(), test_size=0.3, random_state=seed) Xtr, Xval, Ttr, Tval, Ytr, Yval = train_test_split(X, T, Y, test_size=0.5, random_state=seed) data = (Xtr, Xval, Xte, Ttr, Tval, Ytr, Yval, ITEte) return data
def run_model(self): ## initialize data structure self.res = np.zeros([self.duration, 12], dtype=np.float32) self.res[0, 0] = self.nf1 self.res[0, 1] = self.nf2 self.res[0, 2] = self.nf3 self.res[0, 3] = self.nm1 self.res[0, 4] = self.nm2 self.res[0, 5] = self.nm3 self.res[0, 6] = self.vac3 self.res[0, 7] = self.vac2 self.res[0, 8] = self.vac1 self.res[0, 9] = self.female_promotion_probability_1 self.res[0, 10] = self.female_promotion_probability_2 self.res[0, 11] = np.float32( sum(list([self.nf1, self.nf2, self.nf3])) / sum( list([ self.nf1, self.nf2, self.nf3, self.nm1, self.nm2, self.nm3 ]))) hiring_rate_female_level_1 = self.bf1 hiring_rate_female_level_2 = self.bf2 hiring_rate_female_level_3 = self.bf3 attrition_rate_female_level_1 = self.df1 attrition_rate_female_level_2 = self.df2 attrition_rate_female_level_3 = self.df3 attrition_rate_male_level_1 = self.dm1 attrition_rate_male_level_2 = self.dm2 attrition_rate_male_level_3 = self.dm3 probability_of_outside_hire_level_3 = self.phire3 probability_of_outside_hire_level_2 = self.phire2 male_promotion_probability_1_2 = self.male_promotion_probability_1 male_promotion_probability_2_3 = self.male_promotion_probability_2 for i in range(1, self.duration): # initialize variables for this iteration prev_number_of_females_level_1 = self.res[i - 1, 0] prev_number_of_females_level_2 = self.res[i - 1, 1] prev_number_of_females_level_3 = self.res[i - 1, 2] prev_number_of_males_level_1 = self.res[i - 1, 3] prev_number_of_males_level_2 = self.res[i - 1, 4] prev_number_of_males_level_3 = self.res[i - 1, 5] prev_number_of_vacancies_level_3 = self.res[i - 1, 6] prev_number_of_vacancies_level_2 = self.res[i - 1, 7] prev_number_of_vacancies_level_1 = self.res[i - 1, 8] prev_promotion_rate_female_level_1 = self.female_promotion_probability_1 prev_promotion_rate_female_level_2 = self.female_promotion_probability_2 if np.isnan(prev_promotion_rate_female_level_1): prev_promotion_rate_female_level_1 = 0 if np.isnan(prev_promotion_rate_female_level_2): prev_promotion_rate_female_level_2 = 0 prev_gender_proportion_of_department = np.float32( sum( list([ prev_number_of_females_level_1, prev_number_of_females_level_2, prev_number_of_females_level_3 ])) / (sum( list([ prev_number_of_females_level_1, prev_number_of_females_level_2, prev_number_of_females_level_3, prev_number_of_males_level_1, prev_number_of_males_level_2, prev_number_of_males_level_3 ])))) # Process Model # first both female and males leave the department according to binomial probability. female_attrition_level_3 = binomial(prev_number_of_females_level_3, attrition_rate_female_level_3) male_attrition_level_3 = binomial(prev_number_of_males_level_3, attrition_rate_male_level_3) # the departures create a set of vacancies. These vacancies are the basis for new hiring total_vacancies_3 = female_attrition_level_3 + male_attrition_level_3 # women are hired first and then men hiring_female_3 = binomial( total_vacancies_3, probability_of_outside_hire_level_3 * hiring_rate_female_level_3) hiring_male_3 = binomial( max(0, total_vacancies_3 - hiring_female_3), probability_of_outside_hire_level_3 * (1 - hiring_rate_female_level_3)) # promotion after hiring level 3 promotions_female_after_hiring_2_3 = binomial( max(prev_number_of_females_level_2, total_vacancies_3 - hiring_female_3 - hiring_male_3), prev_promotion_rate_female_level_2) # formula should read that either the remaining vacancies or the previous number of males--whichever is smallest. But need to make sure no negative values. promotions_of_males_level_2_3 = binomial( min( prev_number_of_males_level_2, max( 0, total_vacancies_3 - hiring_female_3 - hiring_male_3 - promotions_female_after_hiring_2_3)), male_promotion_probability_2_3) assert (promotions_of_males_level_2_3 >= 0), "promotions_of_males_level_2_3 is negative" # attrition at level 2 - either people leave from attrition or promotion female_attrition_level_2 = binomial( max( 0, prev_number_of_females_level_2 - promotions_female_after_hiring_2_3), attrition_rate_female_level_2) male_attrition_level_2 = binomial( max( 0, prev_number_of_males_level_2 - promotions_of_males_level_2_3), attrition_rate_male_level_2) # the departures create a set of vacancies. These vacancies are the basis for new hiring total_vacancies_2 = sum( list([ female_attrition_level_2, male_attrition_level_2, promotions_female_after_hiring_2_3, promotions_of_males_level_2_3 ])) assert (total_vacancies_2 >= 0), "total vacancies level 2 is less than zero" # TODO set to hiring first hiring_female_2 = binomial( max(0, total_vacancies_2), probability_of_outside_hire_level_2 * hiring_rate_female_level_2) hiring_male_2 = binomial( max(0, total_vacancies_2 - hiring_female_2), 1 - probability_of_outside_hire_level_2 * hiring_rate_female_level_2) promotions_of_females_level_1_2 = binomial( min( 0, min(prev_number_of_females_level_1, total_vacancies_2 - hiring_female_2 - hiring_male_2)), prev_promotion_rate_female_level_1) promotions_of_males_level_1_2 = binomial( min( 0, min( prev_number_of_males_level_1, total_vacancies_2 - hiring_female_2 - hiring_male_2 - promotions_of_females_level_1_2)), male_promotion_probability_1_2) assert (promotions_of_females_level_1_2 >= 0), "promotions of females level 1-2 is negative" assert (promotions_of_males_level_1_2 >= 0), "promotions of males level 1-2 is negative" total_hiring_2 = hiring_female_2 + hiring_male_2 ## Level 1 female_attrition_level_1 = binomial( max( 0, prev_number_of_females_level_1 - promotions_of_females_level_1_2), attrition_rate_female_level_1) male_attrition_level_1 = binomial( max(0, prev_number_of_males_level_1), attrition_rate_male_level_1) total_vacancies_1 = sum( list([ female_attrition_level_1, male_attrition_level_1, promotions_of_females_level_1_2, promotions_of_males_level_1_2 ])) hiring_female_1 = binomial(total_vacancies_1, hiring_rate_female_level_1) hiring_male_1 = binomial( max(0, total_vacancies_1 - hiring_female_1), 1 - hiring_rate_female_level_1) # Write state variables to array and move to next iteration self.res[i, 0] = number_of_females_level_1 = sum( list([ prev_number_of_females_level_1, neg(female_attrition_level_1), neg(promotions_of_females_level_1_2), hiring_female_1 ])) self.res[i, 1] = number_of_females_level_2 = max( 0, sum( list([ prev_number_of_females_level_2, neg(female_attrition_level_2), neg(promotions_female_after_hiring_2_3), promotions_of_females_level_1_2, hiring_female_2 ]))) self.res[i, 2] = number_of_females_level_3 = sum( list([ prev_number_of_females_level_3, neg(female_attrition_level_3), promotions_female_after_hiring_2_3, hiring_female_3 ])) self.res[i, 3] = number_of_males_level_1 = sum( list([ prev_number_of_males_level_1, neg(male_attrition_level_1), neg(promotions_of_males_level_1_2), hiring_male_1 ])) self.res[i, 4] = number_of_males_level_2 = sum( list([ prev_number_of_males_level_2, neg(male_attrition_level_2), neg(promotions_of_males_level_2_3), promotions_of_males_level_1_2, hiring_male_2 ])) self.res[i, 5] = number_of_males_level_3 = sum( list([ prev_number_of_males_level_3, neg(male_attrition_level_3), promotions_of_males_level_2_3, hiring_male_3 ])) self.res[i, 6] = number_of_vacancies_level_3 = sum( list([male_attrition_level_3, female_attrition_level_3])) self.res[i, 7] = number_of_vacancies_level_2 = sum( list([ male_attrition_level_2, female_attrition_level_2, promotions_female_after_hiring_2_3, promotions_of_males_level_2_3 ])) self.res[i, 8] = number_of_vacancies_level_1 = sum( list([ male_attrition_level_1, female_attrition_level_1, promotions_of_males_level_1_2, promotions_of_females_level_1_2 ])) self.res[ i, 9] = promotion_rate_female_level_1 = self.female_promotion_probability_1 self.res[ i, 10] = promotion_rate_women_level_2 = self.female_promotion_probability_2 self.res[i, 11] = gender_proportion_of_department = np.float32( truediv( sum( list([ number_of_females_level_1, number_of_females_level_2, number_of_females_level_3 ])), sum( list([ number_of_females_level_1, number_of_females_level_2, number_of_females_level_3, number_of_males_level_1, number_of_males_level_2, number_of_males_level_3 ])))) # print(self.res[i,:]) ## Print Data matrix df_ = pd.DataFrame(self.res) df_.columns = [ 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 't3', 't2', 't1', 'prom1', 'prom2', 'gendprop' ] recarray_results = df_.to_records(index=True) self.run = recarray_results return recarray_results
x = random.normal(loc=1, scale=4, size=(2, 3)) print("Normal distribution");print(x) plt.hist(x, 10) plt.show() import matplotlib.pyplot as plt import seaborn as sns #sns.distplot(random.normal(size=100), hist=False) #plt.show() # Binomial Distribution is a Discrete Distribution. ex binary scenarios toss of a coin # it has three parameters:n - number of trials. # p - probability of occurence of each trial (e.g. for toss of a coin 0.5 each). # size - The shape of the returned array. from numpy import random x = random.binomial(n=10, p=0.5, size=10) # print("binomial distribution");print(x) from numpy import random import matplotlib.pyplot as plt import seaborn as sns # sns.distplot(random.binomial(n=10, p=0.5, size=100), hist=True, kde=False) # plt.show() # Poisson Distribution is a Discrete Distribution. # It estimates how many times an event can happen in a specified time. e.g. If someone eats twice a day what is probability he will eat thrice? # It has two parameters: lam - rate or known number of occurences e.g. 2 for above problem. #size - The shape of the returned array. from numpy import random import matplotlib.pyplot as plt
def _simulate_claim_data(T, freq, sev, theta, obsFreqs=None): # T = integer that corresponds to the number of time period observed # freq = claim frequency distribution to be chosen in ("poisson", # "binomial", "negative binomial) # theta_freq = parameters of the claim frequency distribution # sev = claim sizes distribution to be chosen in ("weibuill", "lognormal", # "gamma") # theta_sev = parameters of the claim sizes distribution if freq == "ones": freqs = np.ones(T).astype(np.int64) thetaSev = theta elif freq == "obs": freqs = obsFreqs thetaSev = theta elif freq == "bernoulli": p = theta[0] thetaSev = theta[1:] freqs = np.empty(T, np.int64) for t in range(T): freqs[t] = rnd.binomial(1, p) elif freq == "binomial": n, p = theta[0:2] thetaSev = theta[2:] freqs = np.empty(T, np.int64) for t in range(T): freqs[t] = rnd.binomial(n, p) elif freq == "poisson": lam = theta[0] thetaSev = theta[1:] freqs = np.empty(T, np.int64) for t in range(T): freqs[t] = rnd.poisson(lam) elif freq == "geometric": p = theta[0] thetaSev = theta[1:] freqs = np.empty(T, np.int64) for t in range(T): freqs[t] = rnd.geometric(1 - p) - 1 elif freq == "negative binomial": a, p = theta[0:2] thetaSev = theta[2:] freqs = np.empty(T, np.int64) for t in range(T): freqs[t] = negative_binomial(a, p) else: return # raise Exception(f"Unknown frequency distribution: {freq}") N = np.sum(freqs) if sev == "frequency dependent exponential": sevs = np.empty(N, np.float64) i = 0 for t in range(T): sevs[i:i+freqs[t]] = _simulate_claim_sizes(freqs[t], sev, thetaSev) i += freqs[t] else: sevs = _simulate_claim_sizes(N, sev, thetaSev) return freqs, sevs
def run_model(self): ## initialize data structure self.res = np.zeros([ self.duration, len(MODEL_RUN_COLUMNS) + len(EXPORT_COLUMNS_FOR_CSV) ], dtype=np.float32) self.res[0, 0] = self.nf1 self.res[0, 1] = self.nf2 self.res[0, 2] = self.nf3 self.res[0, 3] = self.nm1 self.res[0, 4] = self.nm2 self.res[0, 5] = self.nm3 self.res[0, 6] = 0 self.res[0, 7] = 0 self.res[0, 8] = 0 self.res[0, 9] = self.female_promotion_probability_1 self.res[0, 10] = self.female_promotion_probability_2 self.res[0, 11] = np.float32( sum(list([self.nf1, self.nf2, self.nf3])) / sum( list([ self.nf1, self.nf2, self.nf3, self.nm1, self.nm2, self.nm3 ]))) self.res[0, 12] = 0 self.res[0, 13] = self.res[0, 0:6].sum() self.res[0, 14:] = 0 # I assign the state variables to temporary variables. That way I # don't have to worry about overwriting the original state variables. hiring_rate_female_level_1 = self.bf1 hiring_rate_female_level_2 = self.bf2 hiring_rate_female_level_3 = self.bf3 attrition_rate_female_level_1 = self.df1 attrition_rate_female_level_2 = self.df2 attrition_rate_female_level_3 = self.df3 attrition_rate_male_level_1 = self.dm1 attrition_rate_male_level_2 = self.dm2 attrition_rate_male_level_3 = self.dm3 probability_of_outside_hire_level_3 = self.phire3 probability_of_outside_hire_level_2 = self.phire2 female_promotion_probability_1_2 = self.female_promotion_probability_1 female_promotion_probability_2_3 = self.female_promotion_probability_2 department_size_upper_bound = self.upperbound department_size_lower_bound = self.lowerbound variation_range = self.variation_range unfilled_vacanies = 0 change_to_level_1 = 0 change_to_level_2 = 0 change_to_level_3 = 0 for i in range(1, self.duration): # initialize variables for this iteration prev_number_of_females_level_1 = self.res[i - 1, 0] prev_number_of_females_level_2 = self.res[i - 1, 1] prev_number_of_females_level_3 = self.res[i - 1, 2] prev_number_of_males_level_1 = self.res[i - 1, 3] prev_number_of_males_level_2 = self.res[i - 1, 4] prev_number_of_males_level_3 = self.res[i - 1, 5] prev_number_of_vacancies_level_3 = self.res[i - 1, 6] prev_number_of_vacancies_level_2 = self.res[i - 1, 7] prev_number_of_vacancies_level_1 = self.res[i - 1, 8] prev_promotion_rate_female_level_1 = self.female_promotion_probability_1 prev_promotion_rate_female_level_2 = self.female_promotion_probability_2 department_size = self.res[i - 1, 0:6].sum() # Process Model # Determine department size variation for this timestep # first both female and males leave the department according to binomial probability. female_attrition_level_3 = binomial(prev_number_of_females_level_3, attrition_rate_female_level_3) male_attrition_level_3 = binomial(prev_number_of_males_level_3, attrition_rate_male_level_3) # the departures create a set of vacancies. These vacancies are the basis for new hiring total_vacancies_3 = female_attrition_level_3 + \ male_attrition_level_3 + change_to_level_3 # women are hired first and then men hiring_female_3 = binomial( max(0, total_vacancies_3), probability_of_outside_hire_level_3 * hiring_rate_female_level_3) hiring_male_3 = binomial( max(0, total_vacancies_3 - hiring_female_3), probability_of_outside_hire_level_3 * (1 - hiring_rate_female_level_3)) total_hiring_3 = hiring_female_3 + hiring_male_3 # level 3 vacancies that are not filled by new hires create opportunities # for promotion from level 2. Again women are promoted first and men second. # Also note the error trap that if we try to promote more professors from # level 2 than there exist at level 2, then we will prevent this from happening. vacancies_remaining_after_hiring_3 = total_vacancies_3 - total_hiring_3 potential_promotions_after_hiring_3 = max( 0, vacancies_remaining_after_hiring_3) promotions_of_females_level_2_3 = binomial( min(potential_promotions_after_hiring_3, prev_number_of_females_level_2), female_promotion_probability_2_3) promotions_of_males_level_2_3 = binomial( max( 0, min( vacancies_remaining_after_hiring_3 - promotions_of_females_level_2_3, prev_number_of_males_level_2)), 1 - female_promotion_probability_2_3) # attrition at level 2 - either people leave from attrition or promotion female_attrition_level_2 = binomial( max( 0, prev_number_of_females_level_2 - promotions_of_females_level_2_3), attrition_rate_female_level_2) male_attrition_level_2 = binomial( max( 0, prev_number_of_males_level_2 - promotions_of_males_level_2_3), attrition_rate_male_level_2) # the departures create a set of vacancies. These vacancies are the basis for new hiring total_vacancies_2 = sum( list([ female_attrition_level_2, male_attrition_level_2, promotions_of_females_level_2_3, promotions_of_males_level_2_3, change_to_level_2 ])) hiring_female_2 = binomial( max(0, total_vacancies_2), probability_of_outside_hire_level_2 * hiring_rate_female_level_2) hiring_male_2 = binomial( max(0, total_vacancies_2 - hiring_female_2), probability_of_outside_hire_level_2 * (1 - hiring_rate_female_level_2)) total_hiring_2 = hiring_female_2 + hiring_male_2 vacancies_remaining_after_hiring_2 = total_vacancies_2 - total_hiring_2 potential_promotions_after_hiring_2 = max( 0, vacancies_remaining_after_hiring_2) promotions_of_females_level_1_2 = binomial( max( 0, min(potential_promotions_after_hiring_2, prev_number_of_females_level_1)), female_promotion_probability_1_2) promotions_of_males_level_1_2 = binomial( max( 0, min( vacancies_remaining_after_hiring_2 - promotions_of_females_level_1_2, prev_number_of_females_level_1)), probability_of_outside_hire_level_2 * (1 - female_promotion_probability_1_2)) ## Level 1 female_attrition_level_1 = binomial( max( 0, prev_number_of_females_level_1 - promotions_of_females_level_1_2), attrition_rate_female_level_1) male_attrition_level_1 = binomial( max( 0, prev_number_of_males_level_1 - promotions_of_males_level_1_2), attrition_rate_male_level_1) total_vacancies_1 = sum( list([ female_attrition_level_1, male_attrition_level_1, promotions_of_females_level_1_2, promotions_of_males_level_1_2, change_to_level_1 ])) hiring_female_1 = binomial(max(0, total_vacancies_1), hiring_rate_female_level_1) hiring_male_1 = binomial( max(0, total_vacancies_1 - hiring_female_1), 1 - hiring_rate_female_level_1) # Write state variables to array and move to next iteration self.res[i, 0] = number_of_females_level_1 = sum( list([ prev_number_of_females_level_1, neg(female_attrition_level_1), neg(promotions_of_females_level_1_2), hiring_female_1 ])) assert (number_of_females_level_1 >= 0), "negative number of females 1" self.res[i, 1] = number_of_females_level_2 = max( 0, sum( list([ prev_number_of_females_level_2, neg(female_attrition_level_2), neg(promotions_of_females_level_2_3), promotions_of_females_level_1_2, hiring_female_2 ]))) self.res[i, 2] = number_of_females_level_3 = sum( list([ prev_number_of_females_level_3, neg(female_attrition_level_3), promotions_of_females_level_2_3, hiring_female_3 ])) self.res[i, 3] = number_of_males_level_1 = sum( list([ prev_number_of_males_level_1, neg(male_attrition_level_1), neg(promotions_of_males_level_1_2), hiring_male_1 ])) self.res[i, 4] = number_of_males_level_2 = sum( list([ prev_number_of_males_level_2, neg(male_attrition_level_2), neg(promotions_of_males_level_2_3), promotions_of_males_level_1_2, hiring_male_2 ])) self.res[i, 5] = number_of_males_level_3 = sum( list([ prev_number_of_males_level_3, neg(male_attrition_level_3), promotions_of_males_level_2_3, hiring_male_3 ])) self.res[i, 6] = sum( list([male_attrition_level_3, female_attrition_level_3])) self.res[i, 7] = sum( list([ male_attrition_level_2, female_attrition_level_2, promotions_of_females_level_2_3, promotions_of_males_level_2_3 ])) self.res[i, 8] = sum( list([ male_attrition_level_1, female_attrition_level_1, promotions_of_males_level_1_2, promotions_of_females_level_1_2 ])) self.res[i, 9] = self.female_promotion_probability_1 self.res[i, 10] = self.female_promotion_probability_2 self.res[i, 11] = np.float32( truediv( sum( list([ number_of_females_level_1, number_of_females_level_2, number_of_females_level_3 ])), sum( list([ number_of_females_level_1, number_of_females_level_2, number_of_females_level_3, number_of_males_level_1, number_of_males_level_2, number_of_males_level_3 ])))) unfilled_vacanies = abs(department_size - self.res[i, 0:6].sum()) self.res[i, 12] = unfilled_vacanies department_size = self.res[i, 0:6].sum() self.res[i, 13] = department_size self.res[i, 14] = hiring_female_3 self.res[i, 15] = hiring_male_3 self.res[i, 16] = hiring_female_2 self.res[i, 17] = hiring_male_2 self.res[i, 18] = hiring_female_1 self.res[i, 19] = hiring_male_1 self.res[i, 20] = 0 self.res[i, 21] = 0 self.res[i, 22] = promotions_of_females_level_2_3 self.res[i, 23] = promotions_of_males_level_2_3 self.res[i, 24] = promotions_of_females_level_1_2 self.res[i, 25] = promotions_of_males_level_1_2 self.res[i, 26] = hiring_rate_female_level_1 self.res[i, 27] = hiring_rate_female_level_2 self.res[i, 28] = hiring_rate_female_level_3 self.res[i, 29] = 1 - hiring_rate_female_level_1 self.res[i, 30] = 1 - hiring_rate_female_level_2 self.res[i, 31] = 1 - hiring_rate_female_level_3 self.res[i, 32] = attrition_rate_female_level_1 self.res[i, 33] = attrition_rate_female_level_2 self.res[i, 34] = attrition_rate_female_level_3 self.res[i, 35] = attrition_rate_male_level_1 self.res[i, 36] = attrition_rate_male_level_2 self.res[i, 37] = attrition_rate_male_level_3 self.res[i, 38] = 1 self.res[i, 39] = probability_of_outside_hire_level_2 self.res[i, 40] = probability_of_outside_hire_level_3 self.res[i, 41] = female_promotion_probability_1_2 self.res[i, 42] = female_promotion_probability_2_3 self.res[i, 43] = 1 - female_promotion_probability_1_2 self.res[i, 44] = 1 - female_promotion_probability_2_3 self.res[i, 45] = department_size_upper_bound self.res[i, 46] = department_size_lower_bound self.res[i, 47] = variation_range self.res[i, 48] = self.duration # this produces an array of values. Then I need to assign the # values to levels. So if I have say a range of variation of 5. I # will get something like [-1,0,1,-1,0] or something. I need to # turn this into something like [2,-1,0]. That means randomly # assigning the values in the array to levels. flag = False while flag == False: changes = np.random.choice([-1, 0, 1], variation_range) levels = np.random.choice([1, 2, 3], variation_range) # # random level # choice # need to test whether the candidate changes keep the # department size within bounds. # print(["old dept size:", department_size, # "new dept size:", self.res[i, 0:6].sum(), # "candidate:", department_size + # changes.sum(), # " added postions: ", changes.sum(), # "unfilled ", unfilled_vacanies]) if (department_size + changes.sum() <= department_size_upper_bound and department_size + changes.sum() >= department_size_lower_bound): change_to_level_3 = np.int( changes[np.where(levels == 3)[0]].sum()) change_to_level_2 = np.int( changes[np.where(levels == 2)[0]].sum()) change_to_level_1 = np.int( changes[np.where(levels == 1)[0]].sum()) flag = True if (department_size > department_size_upper_bound): change_to_level_3 = 0 change_to_level_2 = 0 change_to_level_1 = 0 flag = True if department_size < department_size_lower_bound: changes = np.ones(variation_range) change_to_level_3 = np.int( changes[np.where(levels == 3)[0]].sum()) change_to_level_2 = np.int( changes[np.where(levels == 2)[0]].sum()) change_to_level_1 = np.int( changes[np.where(levels == 1)[0]].sum()) flag = True df_ = pd.DataFrame(self.res) df_.columns = MODEL_RUN_COLUMNS + EXPORT_COLUMNS_FOR_CSV recarray_results = df_.to_records(index=True) self.run = recarray_results return recarray_results
But we can see that similar to binomial for a large enough poisson distribution it will become similar to normal distribution with certain std dev and mean.''' sns.distplot(random.normal(loc=50, scale=7, size=1000), hist=False, label='normal') sns.distplot(random.poisson(lam=50, size=1000), hist=False, label='poisson') plt.show() '''Difference Between Poisson and Binomial Distribution The difference is very subtle it is that, binomial distribution is for discrete trials, whereas poisson distribution is for continuous trials. But for very large n and near-zero p binomial distribution is near identical to poisson distribution such that n * p is nearly equal to lam.''' sns.distplot(random.binomial(n=1000, p=0.01, size=1000), hist=False, label='binomial') sns.distplot(random.poisson(lam=10, size=1000), hist=False, label='poisson') plt.show() print('Uniform Distribution') '''Used to describe probability where every event has equal chances of occuring.''' x = random.uniform(size=(2, 3)) print(x) sns.distplot(random.uniform(size=1000), hist=False) plt.show() print('Logistic Distribution') '''Logistic Distribution is used to describe growth. Used extensively in machine learning in logistic regression, neural networks etc.'''
# binomial_distribution is a discrete distribution # it describes the outcome of binary scenarios, # e.g. toss of a coin, it will either be head or till # it has three parameters # n - number of trials # p - probability of occurence of each trial(e.g for toss of a coin = 0.5 for each) # size - the shape of the returned array # Discrebe Distribution - # the distribution is defined at separate set of events. from numpy import random x = random.binomial(n=10, p=0.5, size=10) print(x) # visualization of binomial distribution import matplotlib.pyplot as plt import seaborn as sns sns.distplot(random.binomial(n=10, p=0.5, size=1000), hist=True, kde=False) plt.show() # difference between normal and binomial distribution """ The main difference is that normal distribution is continuous whereas binomial is discrete, but if there are enough data points it will be quite similar to normal distribution will certain loc, and scale
from numpy import random seed = int(input("Enter seed: ")) num = int(input("Enter the number of trials: ")) prob = float(input("Enter the probability: ")) x = random.binomial(n=num, p=prob, size=seed) print(x)
], action_feature_funcs=[], dnn_spec=DNNSpec( neurons=[2], hidden_activation=DNNSpec.relu, hidden_activation_deriv=DNNSpec.relu_deriv, output_activation=DNNSpec.sigmoid, output_activation_deriv=DNNSpec.sigmoid_deriv)) ] # noinspection PyPep8 this_score_func = lambda a, p: [ 1. / p[0] if a == (10, ) else 1. / (p[0] - 1.) ] # noinspection PyPep8 sa_gen_func = lambda p, n: [((10, ) if x == 1 else (-10, )) for x in binomial(1, p[0], n)] pg_obj = PolicyGradient(mdp_rep_for_rl_pg=mdp_rep_obj, reinforce=reinforce_val, num_batches=num_batches_val, batch_size=batch_size_val, num_action_samples=num_action_samples_val, max_steps=max_steps_val, actor_lambda=actor_lambda_val, critic_lambda=critic_lambda_val, score_func=this_score_func, sample_actions_gen_func=sa_gen_func, fa_spec=fa_spec_val, pol_fa_spec=pol_fa_spec_val) def policy_func(i: int) -> Mapping[Tuple[int], float]: if i == 1:
# automate shift maker def random_diag_sin_shifts(n, min_c=0, max_c=10, min_a=.01, max_a=.05, min_f=15, max_f=20): """Generate systematics shift sine functions. Each one follows a formula: f(x) = x + ampl * sin(freq * x) """ C = runif(n, min_c, max_c) A = runif(n, min_a, max_a) F = runif(n, min_f, max_f) return tuple([ lambda x: c + x + a * np.sin(2 * pi * x / f) for c, a, f in zip(C, A, F) ]) if name == "__main__": rt = draw_rt() rts = draw_runs(rt, 10) shifts = random_diag_sin_shifts(10) rtss = act(shifts, rts) # add big jumps here. npr.binomial(10000, .01)
def evaluation(silent, simulation_rounds, malicious_devices_prop, devices_per_cluster, eval_time, malicious_frequency_multiplier, threshold_ratio, periodicity_error, results_file): global counter_lock, counter show_pbar = not silent and counter_lock is not None and counter is not None aggregated_data_sent_array = [] data_sent_threshold_array = [] num_malicious_array = [] for i in range(simulation_rounds): # Updating progress bar if show_pbar: with counter_lock: counter.value += 1 # Creating the cluster (devices in a flow) iot_devices = [] distribution_device_classes = [ random_int(0, len(device_classes) - 1) for _ in range(devices_per_cluster) ] distribution_malicious = binomial(1, malicious_devices_prop, devices_per_cluster) for type_id, is_malicious in zip(distribution_device_classes, distribution_malicious): iot_devices.append(device_classes[type_id]( is_malicious, eval_time, periodicity_error, malicious_frequency_multiplier)) # Preloading random arrays uniform_first_tx = list(uniform(0, 1, devices_per_cluster)) gaussian_jitter = list(normal(0, 1, devices_per_cluster)) # Computing estimated throughput (data sent within an evaluation period) data_sent_threshold = 0 distribution_devices_counter = dict( Counter(distribution_device_classes)) for device_type_id, device_type_count in distribution_devices_counter.items( ): data_sent_threshold += device_type_count * threshold_ratio * ( eval_time / get_class_from_type_id(device_type_id).legacy_period) * ( get_class_from_type_id(device_type_id).data_burst) # Simulation aggregated_data_sent = 0 for device in iot_devices: # First tranmsission in U(0, tx_period) # Assuming tx_period < eval_time accumulated_time = uniform_first_tx.pop() * device.period # [2,n-1] transmissions num_txs = 1 + int((eval_time - accumulated_time) / device.period) remainder = (eval_time - accumulated_time) % device.period # Last transmission (with jitter) standard_deviation = device.period * (periodicity_error / 2) / 2 last_transmission_time = standard_deviation * gaussian_jitter.pop( ) + device.period if (last_transmission_time < remainder): num_txs += 1 # Adding the data of this device to the aggregated data sent in this period aggregated_data_sent += num_txs * device.data_burst aggregated_data_sent_array.append(aggregated_data_sent) data_sent_threshold_array.append(data_sent_threshold) num_malicious_array.append(sum(distribution_malicious)) # Results result = { "eval_time": eval_time, "malicious_devices_prop": malicious_devices_prop, "devices_per_cluster": devices_per_cluster, "malicious_frequency_multiplier": malicious_frequency_multiplier, "threshold_ratio": threshold_ratio, "periodicity_error": periodicity_error, "simulation_rounds": simulation_rounds, "aggregated_data_sent_array": aggregated_data_sent_array, "data_sent_threshold_array": data_sent_threshold_array, "malicious_quarantined_array": [ int(num_malicious) if (aggregated_data_sent > data_sent_threshold) else 0 for aggregated_data_sent, data_sent_threshold, num_malicious in zip(aggregated_data_sent_array, data_sent_threshold_array, num_malicious_array) ], "malicious_array": [int(x) for x in num_malicious_array], "legitimate_quarantined_array": [ int(devices_per_cluster - num_malicious) if (aggregated_data_sent > data_sent_threshold) else 0 for aggregated_data_sent, data_sent_threshold, num_malicious in zip(aggregated_data_sent_array, data_sent_threshold_array, num_malicious_array) ], "legitimate_array": [devices_per_cluster - int(x) for x in num_malicious_array] } return result
def bino_mutual(filename, periods): print('Binomial simulation of mutualistic interaction %s %ld' % (filename, periods)) tinic = time() filename_a = filename + '_a.txt' minputchar_a = dlmreadlike(filename_a) nrows_a = len(minputchar_a) ncols_a = len(minputchar_a[0]) for i in range(nrows_a): for j in range(ncols_a): minputchar_a[i][j] = float(minputchar_a[i][j]) numspecies_a = ncols_a print("numspecies a %d" % numspecies_a) K_a = [] Nindividuals_a = [] rowNindividuals_a = [] r_a = [] filename_b = filename + '_b.txt' minputchar_b = dlmreadlike(filename_b) nrows_b = len(minputchar_b) ncols_b = len(minputchar_b[0]) for i in range(nrows_b): for j in range(ncols_b): minputchar_a[i][j] = float(minputchar_b[i][j]) numspecies_b = nrows_b - 3 print("numspecies b %d" % numspecies_b) K_b = [] Nindividuals_b = [] rowNindividuals_b = [] r_b = [] for n in range(numspecies_a): rowNindividuals_a.append(int(minputchar_a[nrows_a - 3][n])) K_a.append(int(minputchar_a[nrows_a - 2][n])) r_a.append(minputchar_a[nrows_a - 1][n]) Nindividuals_a.append(rowNindividuals_a) period_year = 365 for k in range(periods - 1): rowNi = [] for n in range(numspecies_a): rperiod = float(r_a[n] / period_year) # Variation due to malthusian parameter r incNmalth = binomial(Nindividuals_a[k][n], 1 - exp(-1 * rperiod)) # Second term of logistic equation incNlogistic = binomial((Nindividuals_a[k][n]**2) / K_a[n], 1 - exp(-1 * rperiod)) # Terms due to other species incNOtherspecies = 0 for j in range(numspecies_a): incNOtherspecies = incNOtherspecies + binomial( round(Nindividuals_a[k][j] * Nindividuals_a[k][n] / K_a[n]), 1 - exp(-1 * rperiod * minputchar_a[n][j])) rowNi.append( round(Nindividuals_a[k][n] + incNmalth - incNlogistic + incNOtherspecies)) Nindividuals_a.append(rowNi) tfin = time() print("Elapsed time %f s" % (tfin - tinic)) dlmwritelike(filename_a, periods, Nindividuals_a, 'bino') plt.plot(Nindividuals_a) plt.show()
def pull(self) -> float: return binomial(1, self.param)
def sample_multiple(self, actions, n): """ draws n samples from the reward distributions of the specified actions. """ return binomial(n, self.expected_rewards[actions])
def run_vi(data_set, alpha, holdout, sigma_a=0.1, sigma_n=0.5, iter_count=50, truncation=15, init_Phi=1.0): data_count = data_set.shape[0] dim_count = data_set.shape[1] elbo_set = np.zeros([iter_count]) nu_set = list() # nu are the varitional parameters on Z phi_set = list() # phi mean param of A Phi_set = list() # Phi cov param of A, per feat -> same for all dims tau_set = list() # tau are the variational parameters on the stick betas iter_times = list() N, D = data_set.shape K = truncation feature_count = truncation # Initialize objects Z = npr.binomial(1, 0.5, [data_count, feature_count]) nu = npr.uniform(0, 1, [data_count, feature_count]) phi = np.zeros((feature_count, dim_count)) Phi = [init_Phi * np.eye(dim_count) for k in range(feature_count)] tau = [np.ones(feature_count), np.ones(feature_count)] # Optimization loop t_start = time.clock() try: for vi_iter in range(iter_count): # Update Phi and phi for k in range(feature_count): coeff = 1 / (1 / (sigma_a**2) + np.sum(nu[:, k]) / (sigma_n**2)) Phi[k] = coeff * np.eye(dim_count) phi_sums = np.dot(nu, phi) phi_sums_cur = phi_sums - np.outer(nu[:, k], phi[k]) phi[k] = coeff * (1 / (sigma_n**2) * np.dot(nu[:, k], (data_set - phi_sums_cur))) assert len(phi[k]) == dim_count # Get the intermediate variables qks = [] Elogsticks = [] for k in range(feature_count): qk, Elogstick = compute_q_Elogstick(tau, k) qks.append(qk) Elogsticks.append(Elogstick) # Update tau, nu for k in range(int(feature_count)): # update nu_k theta = np.sum([ sps.digamma(tau[0][i]) - sps.digamma(tau[0][i] + tau[1][i]) for i in range(k) ]) - Elogsticks[k] theta += -0.5 / (sigma_n**2) * (np.trace(Phi[k]) + np.dot(phi[k], phi[k])) phi_sums = np.dot(nu, phi) # recompute for each nu_k phi_sums_cur = phi_sums - np.outer(nu[:, k], phi[k]) theta += 1 / (sigma_n**2) * np.dot(phi[k], (data_set - phi_sums_cur).T) nu[:, k] = 1 / (1 + np.exp(-theta)) # update tau tau[0][k] = alpha + np.sum(nu[:, k:]) + sum( [(data_count - np.sum(nu[:, m])) * np.sum(qks[m][k + 1:m]) for m in range(k + 1, feature_count)]) tau[1][k] = 1 + sum( [(data_count - np.sum(nu[:, m])) * qks[m][k] for m in range(k, feature_count)]) # Compute the ELBO elbo = compute_elbo(data_set, alpha, sigma_a, sigma_n, phi, Phi, nu, tau) H = holdout.shape[0] num_Z_samples = 50 num_A_samples = 5 num_pi_samples = 10 total_loss = 0.0 # initialize memory sampled_z_counts = np.zeros(K) mses = np.zeros((num_pi_samples, num_Z_samples, num_A_samples, H)) for pk in range(num_pi_samples): vs = np.zeros(K) for k in range(K): vs[k] = np.random.beta(tau[0][k], tau[1][k]) pi = np.cumprod(vs) Z_new = np.zeros((H, K)) for zi in range(num_Z_samples): Z_new = np.random.binomial(1, pi, (H, K)) sampled_z_counts += Z_new.sum(0) A = np.zeros((K, D)) for ai in range(num_A_samples): for k in range(K): A[k] = phi[k].copy() + np.random.normal( 0, Phi[k][0][0], D) X_pred = Z_new.dot(A) diff = (-0.5 * (np.square(X_pred - holdout)) / sigma_n**2 - 0.5 * np.log(2 * np.pi) - np.log(sigma_n)) mses[pk, zi, ai] += diff.sum(axis=1) total_loss = mses.mean() lse = ( logsumexp(mses, axis=(0, 1, 2)) - np.log(num_pi_samples * num_A_samples * num_Z_samples)).mean() sampled_z_counts /= float(num_Z_samples * H * num_pi_samples) # print("z: {}".format(sampled_z_counts)) # Store things and report elbo_set[vi_iter] = elbo nu_set.append(nu) phi_set.append(phi) Phi_set.append(Phi) tau_set.append(tau) iter_times.append(time.clock() - t_start) print( "[Epoch: {:<3}]: ELBO: {:<10} | Test Loss: {:<10} | MSE (LSE): {:<10}" .format(vi_iter, elbo / (float(N)), -1. * total_loss, -1 * lse)) except KeyboardInterrupt: pass return nu_set, phi_set, Phi_set, tau_set, elbo_set, iter_times
all_non_differences = [] all_non_opportunities = [] all_core_differences = [] all_core_opportunities = [] median_pNs = [] median_pSs = [] # Plot percentiles of divergence distribution for species_idx in xrange(0, len(species_names)): species_name = species_names[species_idx] # Use the Poisson thinning theorem to cut down on # non-biological correlations between dS and dN/dS # (i.e., fact that dS is in denominator of dN/dS. thinned_syn_differences_1 = binomial( numpy.array(syn_differences[species_name], dtype=numpy.int32), 0.5) thinned_syn_differences_2 = syn_differences[ species_name] - thinned_syn_differences_1 pS1s = thinned_syn_differences_1 * 1.0 / (syn_opportunities[species_name] / 2.0) pS2s = thinned_syn_differences_2 * 1.0 / (syn_opportunities[species_name] / 2.0) pSs = syn_differences[species_name] * 1.0 / syn_opportunities[species_name] pNs = non_differences[species_name] * 1.0 / non_opportunities[species_name] ptots = (syn_differences[species_name] + non_differences[species_name] ) * 1.0 / (syn_opportunities[species_name] + non_opportunities[species_name]) pseudo_pSs = 1.0 / (syn_opportunities[species_name] / 2.0 + non_opportunities[species_name])