def sampleregex(self, trace, depth=0, conceptDist="default"): """ conceptDist: 'default' assumes base concept probabilities as defined in trace 'uniform' assumes uniform distribution over base concepts """ if depth==0: p_regex = self.p_regex_no_concepts elif depth==maxDepth: p_regex = self.p_regex_no_recursion if trace.baseConcepts else self.p_regex_no_concepts_no_recursion else: p_regex = self.p_regex if trace.baseConcepts else self.p_regex_no_concepts items = list(p_regex.items()) idx = np.random.choice(range(len(items)), p=[p for k,p in items]) R, p = items[idx] if R == pre.String: s = pre.Plus(pre.dot, p=0.3).sample() return R(s) elif R in self.character_classes: return R elif R in [pre.Concat, pre.Alt]: n = geom.rvs(0.8, loc=1) values = [self.sampleregex(trace, depth+1) for i in range(n)] return R(values) elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]: return R(self.sampleregex(trace, depth+1)) elif R == CONCEPT: if conceptDist == "default": return RegexWrapper(np.random.choice(trace.baseConcepts, p=[math.exp(trace.logpConcept(c)) for c in trace.baseConcepts])) elif conceptDist == "uniform": return RegexWrapper(np.random.choice(trace.baseConcepts))
def create_COG(mode, mot): # generate random sequence set1 (100 seqs of length 300 bp) num_seqs_in_set = 100 len_seq = 300 geom_rvs = geom.rvs(0.75, size=num_seqs_in_set, loc=-1) #sym2=0.75, sym3=.7. Originally 0.5 set1 = MGlib.random_DNA(len_seq, { 'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3 }, num_seqs_in_set) # sample large number of sites from motif pmot1 = MGlib.sample_motif(mot, num_seqs_in_set) if mode == "positive": #insert sites in sequences e = 0 while (e < len(set1)): # edit sequence to include random site(s) # determine number of sites per geometric distribution num_sites = geom_rvs[e] new_sites = "" for j in range(0, num_sites): new_sites += random.choice(pmot1) if len(new_sites) > len_seq: new_sites = new_sites[:len_seq] set1[e] = new_sites + set1[e][len(new_sites):] e = e + 1 set2 = set1 return set2
def create_COG(mode, mot): # generate random sequence set1 (100 seqs of length 300 bp) num_seqs_in_set = 100 len_seq = 300 geom_rvs = geom.rvs(0.75, size=num_seqs_in_set, loc=-1) # sym2=0.75, sym3=.7. Originally 0.5 set1 = MGlib.random_DNA(len_seq, {"A": 0.3, "C": 0.2, "G": 0.2, "T": 0.3}, num_seqs_in_set) # sample large number of sites from motif pmot1 = MGlib.sample_motif(mot, num_seqs_in_set) if mode == "positive": # insert sites in sequences e = 0 while e < len(set1): # edit sequence to include random site(s) # determine number of sites per geometric distribution num_sites = geom_rvs[e] new_sites = "" for j in range(0, num_sites): new_sites += random.choice(pmot1) if len(new_sites) > len_seq: new_sites = new_sites[:len_seq] set1[e] = new_sites + set1[e][len(new_sites) :] e = e + 1 set2 = set1 return set2
def follow_perturbed_leader(test_data, epsilon): # get values for each action at every round action1 = test_data[1].copy() action2 = test_data[2].copy() # generate hallucinations hallucinations = geom.rvs(epsilon, size=len(test_data)) # add a halicination at round 0 action1.insert(0, hallucinations[0]) action2.insert(0, hallucinations[1]) # loop through and at each day choose the payoff of the BIH action # key here is that we take into account the round 0 hallucinations ftpl = 0 for idx in range(len(action1)): if idx == 0: continue else: bih1, bih2 = best_in_hindsight(action1, action2, idx) if bih2 > bih1: ftpl += action2[idx] else: ftpl += action1[idx] # now we calculated payoff for FTPL # can further compare with OPT to get regret # print('FTPL TOTAL PAYOFF',ftpl) return ftpl
def publish_block(self, extending_chain, miner): t0 = time.time() extending_chain.publish_block(miner, self.current_time) logging.debug(" Publishing: ", time.time()-t0) mining_time = geom.rvs(p=extending_chain.get_mining_power_sum() / self.avg_block_time) extending_chain.next_block_time = self.current_time + mining_time self.block_times.append(mining_time)
def update_chain(self, chain, block, curr_time): try: chain.get_block(-4).mempool.txs = [] chain.get_block(-4).txs = [] except: "out of bounds" chain.get_block(-1).next_block = block #chain.add_block(block) #add the block to the chain chain.next_block_time = self.current_time + geom.rvs(p=chain.get_mining_power_sum()/self.avg_block_time)
def single_sim(self): bootret_by_year = [] cumulative_lengths = [] data_panel = [] for i in range(self.holding_period): index_start = self.data.sample().index[0] index_start = self.data.index.get_loc(index_start) L_i = self.holding_period + 1 while L_i > self.holding_period: L_i = geom.rvs(p = 1/self.mean_block_length) cumulative_lengths.append(L_i) if sum(cumulative_lengths) > self.holding_period: L_final = self.holding_period - sum(cumulative_lengths[:-1]) if L_final > len(self.data) - index_start: diff = L_final - (len(self.data) - index_start) subsample_generated = self.data.iloc[index_start-diff: (index_start-diff + L_final), :] else: subsample_generated = self.data.iloc[index_start: index_start + L_final, :] data_panel.append(subsample_generated) break else: subsample_generated = self.data.iloc[index_start: index_start + L_i, :] if L_i > len(self.data) - index_start : L_i = len(self.data) - index_start data_panel.append(subsample_generated) cumulative_lengths[-1] = L_i bootstrapSample = pd.concat([subsample for subsample in data_panel], axis = 0, ignore_index = True) if self.stress_freq: historical_ret_by_year = self.data @ np.array([self.w1_stock, self.w2_bond, self.w3_gold]).T year_min_ret = historical_ret_by_year.idxmin() for i in range(self.holding_period): extreme_event_dummy = True if np.random.rand() < 0.05 else False if extreme_event_dummy: if self.stress_intensity == 1: bootstrapSample.iloc[i,:] = self.data.loc[year_min_ret,:] else: bootstrapSample.iloc[i,:] = self.data.loc[year_min_ret,:] bootstrapSample.iloc[i,:] *= 1.5 total_ret_by_year = bootstrapSample @ np.array([self.w1_stock, self.w2_bond, self.w3_gold]).T total_ret_by_year -= self.TER portfolio_path = self.capital * np.cumprod(total_ret_by_year + 1) cagr = (portfolio_path.values[-1] / self.capital) ** (1/self.holding_period) - 1 annual_volatility = total_ret_by_year.std() maxDrawdown = max_drawdown(pd.Series(total_ret_by_year)) omega_ratio2 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.02, annualization = 1) omega_ratio4 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.04, annualization = 1) omega_ratio8 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.08, annualization = 1) return (np.insert(portfolio_path.values, 0, self.capital), cagr, annual_volatility, maxDrawdown, omega_ratio2, omega_ratio4, omega_ratio8)
def emulate_sensor(self, y_in, threshold): finished = False p = 0.2 i = 0 y_out = np.abs(y_in) > threshold while not finished: i += geom.rvs(p) + 7 if i < len(y_in): y_out[i] = ~y_out[i] else: finished = True return y_out.astype(np.short)
def FFS(G, fracs): L = {} Discovered = {} for node in G.nodes(): Discovered[node] = False s = random.choice(G.nodes()) Discovered[s] = True FFSEdges = [] reEdges = [] i = 0 # 计算器 L[i] = set([s]) Length = 1 f = 0 N = 0 while len(L[i]) > 0: N += 1 if N > 6 * G.number_of_nodes(): L = {} Discovered = {} for node in G.nodes(): Discovered[node] = False s = random.choice(G.nodes()) Discovered[s] = True FFSEdges = [] reEdges = [] i = 0 # 计算器 L[i] = set([s]) Length = 1 f = 0 N = 0 L[i + 1] = set() for node in L[i]: from scipy.stats import geom numNeighbors = int(geom.rvs(0.7)) tempArray = G.neighbors(node) random.shuffle(tempArray) for neighbor in tempArray[:numNeighbors]: FFSEdges.append((node, neighbor)) L[i + 1].add(neighbor) if Discovered[neighbor] == False: Discovered[neighbor] = True Length += 1 if Length >= fracs[f] * G.number_of_nodes(): reEdges.append(FFSEdges[:]) f += 1 if f >= len(fracs): return reEdges i += 1 return reEdges
def sample_from_geometric_distribution(e, N): # geometry distribution parameter p = 1.0 / e while True: # sample N bids from geometric distribution r = geom.rvs(p, size=N) # calculate the average of all N sampled data point feedback = math.floor(r.mean()) # ensure that feedback is positive, or sample feedback again if feedback > 0: break return feedback
def __init__(self, space="ring", mobility="SRW", num=1): #for symmetric random walk strn = "node" + str(num) + "loc" self.loc = int( load_context(strn, int(np.ceil(np.random.randint(0, 359) / theta_m)))) #initial angle, in theta_m units #print_log("NODE","Initial Location ",self.loc); self.mobilityscale = 1000 #mobilityscale is in terms of samples. For each mobilityscale number of samples, the node moves left or right with equal probability #this is also the scale at which next transmission probabilities are decided strn = "node" + str(num) + "p" self.p = float(load_context(strn, 0.05)) #probability of transmitting in a sample duration strn = "node" + str(num) + "next_event" self.next_event = int( load_context(strn, self.mobilityscale * geom.rvs(self.p))) #gets the first value for tranmission slot. #this is not the global time. this is time-to-next-event self.state = "IDLE" #better handling with FSM is required here self.samplenum = 0 #the ongoing IQ sample number self.num = num strn = "node" + str(num) + "num_attempts" self.num_attempts = int(load_context(strn, 1)) #2 is added to the length to ensure that the begining and end #are zero so that the receiver can perform energy detection. payload = BitArray(int=self.get_loc(), length=16) payload = payload + BitArray(int=self.num, length=16) #print("payload ",payload) y = MAC_PHYSICAL_LAYER_PACKET(mac_payload_size=len(payload), SF=8, mac_payload_stream=payload) self.pktlen = len(y) + 2 #assume len(y) IQ samples per physical layer transmission. self.IQ = (0 + 0j) * np.ones(self.pktlen) #replace this by IQ samples #print("length... ",len(self.IQ)) self.IQ[1:len(y) + 1] = y strn = "node" + str(num) + "last_event_time" self.last_event_time = int(load_context(strn, 0))
def ts_array(n, n_sim, R, l, sim, endcorr): end_part = n if endcorr else n - l + 1 cont = True if (sim == "geom"): len_tot = np.repeat(0, R) lens = np.array(([None] * R)) while (cont): temp = 1 + geom.rvs(1 / l, size=R) temp = np.min(np.array([temp, n_sim - len_tot]), axis=0) lens = np.vstack((lens, temp)) len_tot = len_tot + temp cont = (any(len_tot < n_sim)) lens = lens[1:] nn = lens.shape[0] st = np.random.randint(end_part, size=(nn, R)) else: nn = int(np.ceil(n_sim / l)) lens = np.hstack((np.repeat(l, nn - 1), 1 + (n_sim - 1) % l)) st = np.random.randint(end_part, size=(nn, R)) return ({'starts': st, 'lengths': lens})
def get_distribution_under_prior(leaves, admixes=None, sim_length=1000, list_of_summaries=[], thinning_criteria=None, skewed_admixture_prior=False): if admixes is None: admix_sim = True else: admix_sim = False res = {summ.name: [] for summ in list_of_summaries} for _ in xrange(sim_length): if admix_sim: admixes = geom.rvs(p=0.5) - 1 tree = generate_phylogeny( leaves, admixes, skewed_admixture_prior=skewed_admixture_prior) if thinning_criteria is None or thinning_criteria(tree): for n, summary in enumerate(list_of_summaries): res[summary.name].append(summary.summary_of_phylogeny(tree)) return res
def cluster_randomly_weigthed(document, prob_coref=0.03): """ Randomly creates a coreferring link between a mention, m2, and any preceding mention, m1. Probability of selecting a closer antecedent is higher, according to a geometric distribution Probability of selecting an antecedent is prob_coref, else selects NO_ANTECEDENT for that given mention. @prob_coref defaults to 0.03 which is the approximate percentage of positive coreferring links. """ links = np.ndarray(shape=(len(document.mentions), ), dtype=int) links.fill(Link.NO_ANTECEDENT) for idx in range(1, len(document.mentions)): if random.random() < prob_coref: random_antecedent = idx - geom.rvs(0.3) links[ idx] = random_antecedent if random_antecedent >= 0 else Link.NO_ANTECEDENT return coreference_links_to_entity_clusters(links)
def sampleregex(self, depth=0): if depth < maxDepth: p_regex = self.p_regex else: p_regex = self.p_regex_no_recursion items = list(p_regex.items()) idx = np.random.choice(range(len(items)), p=[p for k, p in items]) R, p = items[idx] if R == pre.String: s = pre.Plus(pre.dot, p=0.3).sample() return R(s) elif R in self.character_classes: return R elif R in [pre.Concat, pre.Alt]: n = geom.rvs(0.8, loc=1) values = [self.sampleregex(depth + 1) for i in range(n)] return R(values) elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]: return R(self.sampleregex(depth + 1))
def do_event(self): self.change_loc(self.next_event) #self.next_event is the last time interval self.last_event_time = self.last_event_time + self.next_event #current time #print("last event time of node**********",self.last_event_time) if self.state == "IDLE": #next step is transmission self.state = "Tx" self.samplenum = 1 print_log("NODE", "attempt no. ", self.num, self.num_attempts, self.loc, self.last_event_time) self.next_event = 1 #next event is IQ sample transmission again else: if self.state == "Tx": if self.samplenum == self.pktlen: #last packet #print("%%%%%%%% samplenum",self.samplenum) self.state = "IDLE" #better handling with FSM is required here self.next_event = self.mobilityscale * geom.rvs(self.p) #at the scale of mobilityscale (number of samples) self.cur_loc = self.get_loc() print_log("NODE", "Going to Idle...", self.num, self.last_event_time, self.cur_loc) self.change_loc(self.next_event) payload = BitArray(int=self.get_loc(), length=16) payload = payload + BitArray(int=self.num, length=16) y = MAC_PHYSICAL_LAYER_PACKET( mac_payload_size=len(payload), SF=8, mac_payload_stream=payload) self.IQ[1:len(y) + 1] = y self.samplenum = 0 #print("before next attempt",self.num_attempts,self.num) self.num_attempts = self.num_attempts + 1 else: #not transiting to IDLE self.state = "Tx" self.samplenum = self.samplenum + 1 self.next_event = 1
def simulate_tree(no_leaves, no_admixes=None): if no_admixes is None: no_admixes = geom.rvs(p=0.5) - 1 tree = generate_phylogeny(no_leaves, no_admixes) return unique_identifier_and_branch_lengths(tree)
def __call__(self, options, pars, obs=None, trackobs=False): """Simulate process model to get predicted choice and sample size distributions""" ### Basic setup np.random.seed() N = pars.get('N', 10000) # number of simulated trials max_T = int(pars.get('max_T', 1000)) # maximum sample size ### Stopping rules if self.stoprule == 'optional': threshold = pars.get('theta', 3) # decision threshold (optional only) r = pars.get('r', 0) # rate of boundary collapse (optional only) stop_T = None # fixed sample size elif self.stoprule == 'fixedT': stop_T = pars.get('stop_T', 2) max_T = stop_T threshold = 1000 # geometric elif self.stoprule == 'fixedGeom': threshold = 1000 p_stop_geom = pars.get('p_stop_geom') minss = pars.get('minsamplesize', 1) # sample size (not index), adjusted by minsamplesize stop_T = geom.rvs(p_stop_geom, size=N) + (minss - 1) # don't go past max_T stop_T[np.where(stop_T > max_T)[0]] = max_T ### Search # probability of sampling each option p_sample_H = pars.get('p_sample_H', .5) p_sample_L = 1 - p_sample_H # if p_switch is specified, it will be used to generate # sequences of observations (rather than p_sample_H and p_sample_L) p_switch = pars.get('p_switch', None) # are the first two samples drawn from different options? switchfirst = pars.get('switchfirst', False) ### Sequential weights # compute value and attentional weights for multinomial problems if self.problemtype == 'multinomial': if self.rdw is None: wopt = options else: wopt = self.rdw[pars['probid']] weights = np.array([cpt.pweight_prelec(option, pars) for option in wopt]) values = np.array([cpt.value_fnc(option[:,0], pars) for option in options]) v = np.array([np.multiply(weights[i], values[i]) for i in range(len(options))]) V = v.sum(axis=1) evar = np.array([np.dot(weights[i], values[i] ** 2) - np.sum(v[i]) ** 2 for i in range(len(options))]) sigma2 = np.max([np.sum(evar), 1e-10]) sigma2mean = np.max([np.mean(evar), 1e-10]) # sequential weights omega = [] for i, option in enumerate(options): omega.append(weights[i]/option[:,1]) omega = np.array(omega) omega[np.isnan(omega)] = 0 w_outcomes = np.array([np.multiply(omega[i], values[i]) for i in range(len(options))]) elif self.problemtype == 'normal': if 'pow_gain' in pars: w_options = np.array([[0,0],[0,0]]) for i in range(2): ev, evar = cpt.normal_raised_to_power(options[i], pars['pow_gain']) w_options[i] = np.array([ev, evar]) sigma2 = w_options[:,1].sum() evar = w_options[:,1] else: evar = options[:,1] sigma2 = options[:,1].sum() sigma2mean = options[:,1].mean() # scale by variance if 'sc' in pars: # raised to power sc = pars.get('sc') variance_scale = 1 / float(np.sqrt(sigma2) ** sc) elif 'sc2' in pars: # multiplicative sc = pars.get('sc2') variance_scale = 1 / float(np.sqrt(sigma2) * sc) elif 'sc0' in pars: sc0 = pars.get('sc0') elif 'sc_mean' in pars: sc = pars.get('sc_mean') variance_scale = 1 / float(np.sqrt(sigma2mean) ** sc) elif 'sc2_mean' in pars: sc = pars.get('sc2_mean') variance_scale = 1 / float(np.sqrt(sigma2mean) * sc) elif 'sc_x' in pars: variance_scale = pars.get('sc_x') else: variance_scale = 1 ### Starting distribution Z = np.zeros(N) if 'tau' in pars: tau = pars.get('tau') Z = laplace.rvs(loc=0, scale=tau, size=N) elif 'tau_trunc' in pars: tau = pars.get('tau_trunc') dx = .001 x = np.arange(-(threshold-dx), threshold, dx) p = laplace.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) elif 'tau_rel' in pars: tau = pars.get('tau_rel') tau = tau / variance_scale Z = laplace.rvs(loc=0, scale=tau, size=N) elif 'tau_rel_trunc' in pars: tau = pars.get('tau_rel_trunc') dx = .001 x = np.arange(-1+dx, 1, dx) p = laplace.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) Z = Z * threshold elif 'tau_unif' in pars: #tau = pars.get('tau_unif', .001) #theta_max = pars.get('theta_max', theta) #theta_max = 200 #rng = tau * theta_max rng = pars.get('tau_unif', .001) Z = np.linspace(-rng, rng, num=N) np.random.shuffle(Z) #Z = np.random.uniform(low=(-tau), high=tau, size=N) elif 'tau_unif_rel' in pars: dx = .001 rng = pars.get('tau_unif_rel', .001) Z = np.linspace(-(threshold-dx) * rng, (threshold-dx) * rng, num=N) np.random.shuffle(Z) elif 'tau_normal' in pars: tau = pars.get('tau_normal') Z = norm.rvs(loc=0, scale=tau, size=N) elif 'tau_normal_trunc' in pars: tau = pars.get('tau_normal_trunc') dx = .001 x = np.arange(-(threshold-dx), threshold, dx) p = norm.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) ### Simulate if obs is not None: # assume a single sequence of known observations sampled_option = obs['option'].values outcomes = obs['outcome'].values max_T = outcomes.shape[0] sgn = 2*sampled_option - 1 sv = np.zeros(outcomes.shape) if self.problemtype is 'normal': c = pars.get('c', 0) # add weighting and criterion here sv = cpt.value_fnc(outcomes - c, pars) elif self.problemtype is 'multinomial': pass for i, opt in enumerate(options): for j, x in enumerate(opt): ind = np.where((sampled_option==i) & (outcomes==x[0]))[0] sv[ind] = w_outcomes[i][j] sv = np.multiply(sv, sgn) sampled_option = np.tile(sampled_option, (N, 1)) outcomes = np.tile(outcomes, (N, 1)) sv = np.tile(sv, (N, 1)) elif self.choicerule == 'random': sv = np.zeros((N, max_T)) sampled_option = None outcomes = None else: # otherwise, simulate sampling from options if False and not trackobs and self.problemtype is 'multinomial' and p_switch is None: sampled_option = None outcomes = None valence = deepcopy(w_outcomes) valence[0] = -1 * valence[0] valence = valence.ravel() p = deepcopy(options[:,:,1]) p[0] = p_sample_L * p[0] p[1] = p_sample_H * p[1] p = p.ravel() sv = np.random.choice(valence, p=p, size=(N, max_T)) # ensure that both options are sampled # at least once if switchfirst: first = np.random.binomial(1, .5, size=N) second = 1 - first first2 = np.transpose((first, second)) sampled_A = first2==0 sampled_B = first2==1 observed_A = np.random.choice(range(len(w_outcomes[0])), size=sampled_A.sum(), p=options[0][:,1]) observed_B = np.random.choice(range(len(w_outcomes[1])), size=sampled_B.sum(), p=options[1][:,1]) # subjective weighting sv2 = np.zeros((N, 2)) sv2[sampled_A] = -1 * w_outcomes[0][observed_A] sv2[sampled_B] = w_outcomes[1][observed_B] sv[:,:2] = sv2 else: # which option was sampled sampled_option = np.zeros((N, max_T), int) if p_switch is None: # ignore switching, just search based on [p_sample_H, p_sample_L] sampled_option = np.random.binomial(1, p_sample_H, size=(N, max_T)) else: # generate search sequences based on p_switch switches = np.random.binomial(1, p_switch, size=(N, max_T - 1)) sampled_option[:,0] = np.random.binomial(1, .5, size=N) for i in range(max_T - 1): switch_i = switches[:,i] sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch_i) # ensure both options sampled at least once if switchfirst: first = np.random.binomial(1, .5, size=N) sampled_option[:,0] = first sampled_option[:,1] = 1 - first # FOR SIMULATION #sampled_option = np.zeros((N, max_T), int) #for i in range(N): # arr = sampled_option[i] # arr[:(max_T/2)] = 1 # np.random.shuffle(arr) # sampled_option[i] = arr # FOR SIMULATION #p_switch = pars.get('p_switch', .5) #sampled_option = np.zeros((N, max_T), int) #sampled_option[:,0] = np.random.choice([0, 1], p=[.5, .5], size=N) #for i in range(max_T - 1): # switch = np.random.choice([0, 1], p=[1-p_switch, p_switch], size=N) # sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch) sampled_A = sampled_option==0 sampled_B = sampled_option==1 N_sampled_A = sampled_A.sum() N_sampled_B = sampled_B.sum() # observation matrix - which outcome occurred (by index) observed = np.zeros((N, max_T), int) if self.problemtype == 'multinomial': observed_A = np.random.choice(range(len(w_outcomes[0])), size=sampled_A.sum(), p=options[0][:,1]) observed_B = np.random.choice(range(len(w_outcomes[1])), size=sampled_B.sum(), p=options[1][:,1]) observed[sampled_A] = observed_A observed[sampled_B] = observed_B # record outcomes experienced (by value) outcomes = np.zeros((N, max_T)) if self.problemtype == 'multinomial': obj_outcomes = options[:,:,0] #outcomes[sampled_A] = obj_outcomes[0][observed_A] #outcomes[sampled_B] = obj_outcomes[1][observed_B] # note weighting already done above outcomes[sampled_A] = w_outcomes[0][observed_A] outcomes[sampled_B] = w_outcomes[1][observed_B] outcomes_A = outcomes[sampled_A] outcomes_B = outcomes[sampled_B] else: A, B = options sigmaA = np.sqrt(A[1]) sigmaB = np.sqrt(B[1]) # weird conversion for np.truncnorm lowerA, upperA = (X_MIN - A[0]) / sigmaA, (X_MAX - A[0]) / sigmaA lowerB, upperB = (X_MIN - B[0]) / sigmaB, (X_MAX - B[0]) / sigmaB outcomes_A = np.round(truncnorm.rvs(lowerA, upperA, loc=A[0], scale=sigmaA, size=N_sampled_A)) outcomes_B = np.round(truncnorm.rvs(lowerB, upperB, loc=B[0], scale=sigmaB, size=N_sampled_B)) outcomes[sampled_A] = outcomes_A outcomes[sampled_B] = outcomes_B if 'pow_gain' in pars: outcomes = cpt.value_fnc(outcomes, pars) outcomes_A = cpt.value_fnc(outcomes_A, pars) outcomes_B = cpt.value_fnc(outcomes_B, pars) # comparison sv = np.zeros((N, max_T)) # criteria for each option if 'c' in pars: # compare to constant c = pars.get('c') c_A = c * np.ones(outcomes_A.shape) c_B = c * np.ones(outcomes_B.shape) elif 'c_0' in pars: # compare to sample mean c_0 = pars.get('c_0', 45) sum_A = np.cumsum(np.multiply(sampled_A, outcomes), axis=1) N_A = np.cumsum(sampled_A, axis=1, dtype=float) mn_A = np.multiply(sum_A, 1/N_A) mn_A[np.isnan(mn_A)] = c_0 sum_B = np.cumsum(np.multiply(sampled_B, outcomes), axis=1) N_B = np.cumsum(sampled_B, axis=1, dtype=float) mn_B = np.multiply(sum_B, 1/N_B) mn_B[np.isnan(mn_B)] = c_0 compA = np.multiply(outcomes - mn_B, sampled_A) compB = np.multiply(outcomes - mn_A, sampled_B) #sv = (-1 * compA) + compB else: # (default) compare to true (weighted) # mean of other option if self.problemtype is 'multinomial': A, B = V elif self.problemtype is 'normal': if 'pow_gain' in pars: A, B = w_options[:,0] else: A, B = options[:,0] c_A = B * np.ones(outcomes_A.shape) c_B = A * np.ones(outcomes_B.shape) # combine if 'c_0' in pars: sv = (-1 * compA) + compB else: sv[sampled_A] = -1 * (outcomes_A - c_A) sv[sampled_B] = (outcomes_B - c_B) if 'sc0' in pars: # for any options with a variance of zero, # replace with sc0 evar[evar==0.] = sc0 # scaling factor for each option depends on # its variance sc_A, sc_B = 1/np.sqrt(evar) sv[sampled_A] = sv[sampled_A] * sc_A sv[sampled_B] = sv[sampled_B] * sc_B else: # fixed scaling factor across all options sv = sv * variance_scale # noise if 'c_sigma' in pars: c_sigma = pars.get('c_sigma') err = np.random.normal(loc=0, scale=c_sigma, size=outcomes.shape) elif 'dv_sigma' in pars: dv_sigma = pars.get('dv_sigma') err = np.random.normal(loc=0, scale=dv_sigma, size=N) err = np.tile(err, (max_T, 1)).transpose() else: err = np.zeros(outcomes.shape) sv = sv + err ### Accumulation # add starting states to first outcome sv[:,0] = sv[:,0] + Z # p_stay #p_stay = pars.get('p_stay', 0) #if p_stay > 0: # attended = np.random.binomial(1, 1-p_stay, size=(N, max_T)) # sv = np.multiply(sv, attended) # accumulate P = np.cumsum(sv, axis=1) ### Stopping if self.stoprule == 'optional': if r > 0: # collapsing boundaries threshold_min = .1 upper = threshold_min * np.ones((N, max_T)) dec = np.arange(threshold, threshold_min, -r*threshold) dec = dec[:max_T] upper[:,:dec.shape[0]] = np.tile(dec, (N, 1)) lower = -threshold_min * np.ones((N, max_T)) inc = np.arange(-threshold, -threshold_min, r*threshold) inc = inc[:max_T] lower[:,:inc.shape[0]] = np.tile(inc, (N, 1)) crossed = -1 * (P < lower) + 1 * (P > upper) else: # fixed boundaries crossed = -1 * (P < -threshold) + 1 * (P > threshold) # if minimum sample size, prevent stopping minsamplesize = pars.get('minsamplesize', 1) - 1 crossed[:,:minsamplesize] = 0 # any trials where hit max_T, make decision based on # whether greater or less than zero nodecision = np.where(np.sum(np.abs(crossed), axis=1)==0)[0] if len(nodecision) > 0: n_pos = np.sum(P[nodecision,max_T-1] > 0) n_eq = np.sum(P[nodecision,max_T-1] == 0) n_neg = np.sum(P[nodecision,max_T-1] < 0) #assert n_eq == 0, "reached max_T with preference of 0" crossed[nodecision,max_T-1] += 1*(P[nodecision,max_T-1] >= 0) crossed[nodecision,max_T-1] += -1*(P[nodecision,max_T-1] < 0) elif self.stoprule == 'fixedT': crossed = np.zeros((N, stop_T), dtype=int) crossed[:,(stop_T-1)] = np.sign(P[:,(stop_T-1)]) indifferent = np.where(crossed[:,(stop_T-1)]==0)[0] n_indifferent = len(indifferent) crossed[indifferent] = np.random.choice([-1,1], p=[.5, .5], size=(n_indifferent,1)) assert np.sum(crossed[:,(stop_T-1)]==0)==0 elif self.stoprule == 'fixedGeom': crossed = np.zeros((N, max_T), dtype=int) crossed[range(N),stop_T-1] = np.sign(P[range(N),stop_T-1]) indifferent = np.where(crossed[range(N),stop_T-1]==0)[0] n_indifferent = len(indifferent) t_indifferent = (stop_T-1)[indifferent] crossed[indifferent,t_indifferent] = np.random.choice([-1,1], p=[.5,.5], size=n_indifferent) if obs is not None: p_stop_choose_A = np.sum(crossed==-1, axis=0)*(1/float(N)) p_stop_choose_B = np.sum(crossed==1, axis=0)*(1/float(N)) p_sample = 1 - (p_stop_choose_A + p_stop_choose_B) return {'p_stop_choose_A': p_stop_choose_A, 'p_stop_choose_B': p_stop_choose_B, 'p_sample': p_sample, 'traces': P} else: # samplesize is the **index** where threshold is crossed samplesize = np.sum(1*(np.cumsum(np.abs(crossed), axis=1)==0), axis=1) choice = (crossed[range(N),samplesize] + 1)/2 p_resp = choice.mean() ss_A = samplesize[choice==0] ss_B = samplesize[choice==1] p_stop_A = np.zeros(max_T) p_stop_B = np.zeros(max_T) p_stop_A_f = np.bincount(ss_A, minlength=max_T) p_stop_B_f = np.bincount(ss_B, minlength=max_T) if self.stoprule == 'optional' or self.stoprule == 'fixedGeom': if p_stop_A_f.sum() > 0: p_stop_A = p_stop_A_f/float(p_stop_A_f.sum()) if p_stop_B_f.sum() > 0: p_stop_B = p_stop_B_f/float(p_stop_B_f.sum()) elif self.stoprule == 'fixedT': p_stop_A[stop_T-1] = 1 p_stop_B[stop_T-1] = 1 assert (p_stop_A_f.sum() + p_stop_B_f.sum()) == N p_stop_cond = np.transpose([p_stop_A, p_stop_B]) p_stop_cond[np.isnan(p_stop_cond)] = 0. f_stop_cond = np.transpose([p_stop_A_f, p_stop_B_f])/float(N) # only include data up to choice outcome_ind = None traces = None if type(sampled_option) is np.ndarray and trackobs: sampled_option = [sampled_option[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] outcomes = [outcomes[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] traces = [P[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] if self.problemtype is 'multinomial': outcome_ind = [observed[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] return {'choice': choice, 'samplesize': samplesize + 1, 'p_resp': np.array([1-p_resp, p_resp]), 'p_stop_cond': p_stop_cond, 'f_stop_cond': f_stop_cond, 'sampled_option': sampled_option, 'outcomes': outcomes, 'outcome_ind': outcome_ind, 'traces': traces, 'Z': Z }
def sol_num(n, p1, p2, K): X = geom.rvs(p1, size=n) Y = geom.rvs(p2, size=n) proba = np.sum(X + Y > K) / n return proba
def vector2(n, p, lambda1): N = geom.rvs(p, size=n) X = np.empty(shape=n) for i in range(n): X[i] = gamma.rvs(a=N[i], scale=1 / lambda1, size=1) return np.column_stack((N, X))
print("para k(overflow floods in 100 years) = 0 con 10000000 simulaciones: ", poiss0) print("para k(overflow floods in 100 years) = 1 con 10000000 simulaciones: ", poiss1) print("para k(overflow floods in 100 years) = 2 con 10000000 simulaciones: ", poiss2) print("para k(overflow floods in 100 years) = 3 con 10000000 simulaciones: ", poiss3) print("para k(overflow floods in 100 years) = 4 con 10000000 simulaciones: ", poiss4) print("para k(overflow floods in 100 years) = 5 con 10000000 simulaciones: ", poiss5) print("para k(overflow floods in 100 years) = 6 con 10000000 simulaciones: ", poiss6) print("-----------------------------------------------------------------") print("Ejercicio 4)\n") p2 = 0.05 # x = 5 print("Con p = 0.05 y x = 5:") y = geom.rvs(p2, size=size) a5, b5 = np.unique(y, return_counts=True) c5 = b5 / size f5 = c5[4] print( "Probabilidad de que se necesiten 5 intentos para enlazar \ncon exito una llamada en 10000000 simulaciones: ", f5)
def __init__(self, avgContent, number_of_files): self.avgContent = avgContent self.number_of_files = number_of_files self.filesize = geom.rvs(1.0/self.avgContent, size=number_of_files)
ax.set_title('Función de probabilidad de Geom(0.3)') ax.vlines(x, 0, geom.pmf(x, p), colors='b', lw=4, alpha=0.5) rv = geom(p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='--', lw=1, label="Frozen PMF") ax.legend(loc='best') plt.show() print("Media %f" % mean) print("Varianza %f" % var) print("Sesgo %f" % skew) print("Curtosis %f" % kurt) fig, ax = plt.subplots(1, 1) prob = geom.cdf(x, p) ax.plot(x, prob, 'bo', ms=8, label="Función de distribución acumulada") plt.title('Función de distribución acumulada') plt.show() fig, ax = plt.subplots(1, 1) r = geom.rvs(p, size=10000) plt.hist(r) plt.title('Histograma de la random') plt.show()
from scipy.stats import geom import numpy as np import seaborn as sb simlen=int(1e8) # calculating expectation value upto the largest possible value keeping tim ein mind p=0.25 t=[] s=geom.rvs(p,size=simlen) #for i in range(1,simlen): unique, counts = np.unique(s, return_counts=True) for i in range(len(unique)): t.append(unique[i]*counts[i]/simlen)#calculating expectation values of each event from the generated data sum=0 for q in range(len(t)): sum=sum+t[q] sim_expected=sum theo_expected=4 #plotting x = np.arange(0,25) p=0.25 dist=geom(p) ax = sb.barplot(x=x, y=dist.pmf(x)) ax.set(xlabel='n', ylabel='p(n)') print(f'The theoretical expected value is 4 and the simulated expected value is {sim_expected}')
def _sample_scipy(self, size): p = float(self.p) from scipy.stats import geom return geom.rvs(p=p, size=size)
def test_posterior_model_multichain(true_tree=None, start_tree=None, sim_lengths=[250] * 800, summaries=None, thinning_coef=1, admixtures_of_true_tree=None, no_leaves_true_tree=4, wishart_df=None, sim_from_wishart=False, no_chains=8, result_file='results_mc3.csv', emp_cov=None, emp_remove=-1, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if rescale_empirical_cov: posterior, multiplier = initialize_posterior( m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) multiplier = None print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) if rescale_empirical_cov: post_ = posterior( (scale_tree_copy(true_x[0], 1.0 / multiplier), true_x[1] / multiplier)) else: post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_) if summaries is None: summaries = [ s_variable('posterior'), s_variable('mhr'), s_no_admixes() ] proposal = basic_meta_proposal() #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme_first = deepcopy(sample_verbose_scheme) if 'posterior' in sample_verbose_scheme: sample_verbose_scheme_first['posterior'] = (1, 1) #(1,1) sample_verbose_scheme_first['no_admixes'] = (1, 1) #if 'likelihood' in sample_verbose_scheme: #sample_verbose_scheme_first['likelihood']=(1,1) print sample_verbose_scheme_first MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)], posterior_function=posterior, summaries=summaries, temperature_scheme=fixed_geometrical(800.0, no_chains), printing_schemes=[sample_verbose_scheme_first] + [sample_verbose_scheme for _ in range(no_chains - 1)], iteration_scheme=sim_lengths, overall_thinnings=int(thinning_coef), proposal_scheme=[adaptive_proposal() for _ in range(no_chains)], cores=no_chains, no_chains=no_chains, multiplier=multiplier, result_file=result_file, store_permuts=False) print 'finished MC3' #save_pandas_dataframe_to_csv(results, result_file) #save_permuts_to_csv(permuts, get_permut_filename(result_file)) return true_tree
def test_posterior_model(true_tree=None, start_tree=None, sim_length=100000, summaries=None, thinning_coef=19, admixtures_of_true_tree=None, no_leaves_true_tree=4, filename='results.csv', sim_from_wishart=False, wishart_df=None, sap_sim=False, sap_ana=False, resimulate_regrafted_branch_length=False, emp_cov=None, big_posterior=False, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree, skewed_admixture_prior=sap_sim) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if big_posterior: posterior = initialize_big_posterior(m, wishart_df, use_skewed_distr=sap_ana) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=sap_ana, rescale=rescale_empirical_cov) print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_[:2]) if summaries is None: summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()] proposal = adaptive_proposal( resimulate_regrafted_branch_length=resimulate_regrafted_branch_length) #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme['posterior'] = (1, 1) sample_verbose_scheme['no_admixes'] = (1, 1) final_tree, final_posterior, results, _ = basic_chain( start_x, summaries, posterior, proposal, post=None, N=sim_length, sample_verbose_scheme=sample_verbose_scheme, overall_thinning=int(max(thinning_coef, sim_length / 60000)), i_start_from=0, temperature=1.0, proposal_update=None, check_trees=False) save_to_csv(results, summaries, filename=filename) return true_tree
# payoff matrix for player 2 - sent into FTPL payoff_matrix_p2 = {0: [], 1: [], 2: []} # action array for player 1 - selected by EW action_array_p1 = [] # payoff matrix for player 2 - selected by FTPL action_array_p2 = [] # calculate learning rate k = 3 n = 100 epsilon = theo_opt_epsilon(k, n) # generate hallucinations hallucinations = geom.rvs(epsilon, size=len(payoff_matrix_p2)) possible_actions = np.array([0, 1, 2]) p1_total_payoff = 0 p2_total_payoff = 0 regret_array_p1 = [] regret_array_p2 = [] combined_actions_over_time = [] for r in range(n): ew_action = exponential_weights(payoff_matrix_p1, epsilon, h1, r) action_array_p1.append(ew_action)
""" Generating and plotting geometric distributions In sports it is common for players to make multiple attempts to score points for themselves or their teams. Each single attempt can have two possible outcomes, scoring or not scoring. Those situations can be modeled with geometric distributions. With scipy.stats you can generate samples using the rvs() function for each distribution. Consider the previous example of a basketball player who scores free throws with a probability of 0.3. Generate a sample, and plot it. numpy has been imported for you with the standard alias np. Generate a sample with size=10000 from a geometric distribution with a probability of success of 0.3. Plot the sample generated. """ # Import geom, matplotlib.pyplot, and seaborn from scipy.stats import geom import matplotlib.pyplot as plt import seaborn as sns # Create the sample sample = geom.rvs(p=0.3, size=10000, random_state=13) # Plot the sample sns.distplot(sample, bins=np.linspace(0, 20, 21), kde=False) plt.show()
.output_png { display: table-cell; text-align: center; vertical-align: center; } </style> """) plt.figure(dpi=100) ##### COMPUTATION ##### # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE p_real = 0.3 # DRAW A SAMPLE OF N=1000 np.random.seed(42) sample = geom.rvs(p=p_real, size=100) ##### SIMULATION ##### # MODEL BUILDING with pm.Model() as model: p = pm.Uniform("p") geometric = pm.Geometric("geometric", p=p, observed=sample) # MODEL RUN with model: step = pm.Metropolis() trace = pm.sample(100000, step=step) burned_trace = trace[50000:] # P - 95% CONF INTERVAL ps = burned_trace["p"]
def sample(self, state=None): n = geom.rvs(self.p, loc=0) return "".join(self.val.sample(state) for i in range(n))
def __init__(self, avgContent, number_of_files): self.avgContent = avgContent self.number_of_files = number_of_files self.filesize = geom.rvs(1.0 / self.avgContent, size=number_of_files)
help="Cytosine deamination rate for double stranded DNA [%(default)s]") return parser.parse_args() if __name__ == "__main__": args = parse_args() args.hairpin = args.hairpin.upper() ref1 = randstr(1000000) # trimodal fragment length distribution fraglens = poisson_mix_rvs((60, 80, 100), (0.7, 0.05), args.numreads) # overhang distribution overhangs = geom.rvs(1.0 / (1.0 + args.mean_sslen), loc=-1, size=args.numreads * 2) # output filenames collapsed = "{}collapsed.fastq".format(args.oprefix) uncollapsed1 = "{}uncollapsed_r1.fastq".format(args.oprefix) uncollapsed2 = "{}uncollapsed_r2.fastq".format(args.oprefix) hlen = len(args.hairpin) with open(collapsed, "w") as f_col, \ open(uncollapsed1, "w") as f_unc1, \ open(uncollapsed2, "w") as f_unc2: for rnum, fraglen in enumerate(fraglens): l_overhang, r_overhang = overhangs[2 * rnum:2 * rnum + 2]