Exemple #1
0
	def sampleregex(self, trace, depth=0, conceptDist="default"):
		"""
		conceptDist: 'default' assumes base concept probabilities as defined in trace
					 'uniform' assumes uniform distribution over base concepts
		"""
		if depth==0:
			p_regex = self.p_regex_no_concepts
		elif depth==maxDepth:
			p_regex = self.p_regex_no_recursion if trace.baseConcepts else self.p_regex_no_concepts_no_recursion
		else:
			p_regex = self.p_regex if trace.baseConcepts else self.p_regex_no_concepts
		
		items = list(p_regex.items())
		idx = np.random.choice(range(len(items)), p=[p for k,p in items])
		R, p = items[idx]
			
		if R == pre.String:
			s = pre.Plus(pre.dot, p=0.3).sample()
			return R(s)
		elif R in self.character_classes:
			return R
		elif R in [pre.Concat, pre.Alt]:
			n = geom.rvs(0.8, loc=1)
			values = [self.sampleregex(trace, depth+1) for i in range(n)]
			return R(values)
		elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]:
			return R(self.sampleregex(trace, depth+1))
		elif R == CONCEPT:
			if conceptDist == "default":
				return RegexWrapper(np.random.choice(trace.baseConcepts, p=[math.exp(trace.logpConcept(c)) for c in trace.baseConcepts]))
			elif conceptDist == "uniform":
				return RegexWrapper(np.random.choice(trace.baseConcepts))
def create_COG(mode, mot):
    # generate random sequence set1 (100 seqs of length 300 bp)
    num_seqs_in_set = 100
    len_seq = 300

    geom_rvs = geom.rvs(0.75, size=num_seqs_in_set,
                        loc=-1)  #sym2=0.75, sym3=.7. Originally 0.5

    set1 = MGlib.random_DNA(len_seq, {
        'A': 0.3,
        'C': 0.2,
        'G': 0.2,
        'T': 0.3
    }, num_seqs_in_set)

    # sample large number of sites from motif
    pmot1 = MGlib.sample_motif(mot, num_seqs_in_set)

    if mode == "positive":
        #insert sites in sequences
        e = 0
        while (e < len(set1)):
            # edit sequence to include random site(s)
            # determine number of sites per geometric distribution
            num_sites = geom_rvs[e]
            new_sites = ""
            for j in range(0, num_sites):
                new_sites += random.choice(pmot1)
            if len(new_sites) > len_seq:
                new_sites = new_sites[:len_seq]
            set1[e] = new_sites + set1[e][len(new_sites):]
            e = e + 1

    set2 = set1
    return set2
def create_COG(mode, mot):
    # generate random sequence set1 (100 seqs of length 300 bp)
    num_seqs_in_set = 100
    len_seq = 300

    geom_rvs = geom.rvs(0.75, size=num_seqs_in_set, loc=-1)  # sym2=0.75, sym3=.7. Originally 0.5

    set1 = MGlib.random_DNA(len_seq, {"A": 0.3, "C": 0.2, "G": 0.2, "T": 0.3}, num_seqs_in_set)

    # sample large number of sites from motif
    pmot1 = MGlib.sample_motif(mot, num_seqs_in_set)

    if mode == "positive":
        # insert sites in sequences
        e = 0
        while e < len(set1):
            # edit sequence to include random site(s)
            # determine number of sites per geometric distribution
            num_sites = geom_rvs[e]
            new_sites = ""
            for j in range(0, num_sites):
                new_sites += random.choice(pmot1)
            if len(new_sites) > len_seq:
                new_sites = new_sites[:len_seq]
            set1[e] = new_sites + set1[e][len(new_sites) :]
            e = e + 1

    set2 = set1
    return set2
Exemple #4
0
def follow_perturbed_leader(test_data, epsilon):
    # get values for each action at every round
    action1 = test_data[1].copy()
    action2 = test_data[2].copy()

    # generate hallucinations
    hallucinations = geom.rvs(epsilon, size=len(test_data))

    # add a halicination at round 0
    action1.insert(0, hallucinations[0])
    action2.insert(0, hallucinations[1])

    # loop through and at each day choose the payoff of the BIH action
    # key here is that we take into account the round 0 hallucinations
    ftpl = 0
    for idx in range(len(action1)):
        if idx == 0:
            continue
        else:
            bih1, bih2 = best_in_hindsight(action1, action2, idx)
            if bih2 > bih1:
                ftpl += action2[idx]
            else:
                ftpl += action1[idx]

    # now we calculated payoff for FTPL
    # can further compare with OPT to get regret
    # print('FTPL TOTAL PAYOFF',ftpl)
    return ftpl
 def publish_block(self, extending_chain, miner):
     t0 = time.time()
     extending_chain.publish_block(miner, self.current_time)
     logging.debug("                Publishing: ", time.time()-t0)
     mining_time = geom.rvs(p=extending_chain.get_mining_power_sum() / self.avg_block_time)
     extending_chain.next_block_time = self.current_time + mining_time
     self.block_times.append(mining_time)
 def update_chain(self, chain, block, curr_time):
     try:
         chain.get_block(-4).mempool.txs = []
         chain.get_block(-4).txs = []
     except:
         "out of bounds"
     chain.get_block(-1).next_block = block
     #chain.add_block(block) #add the block to the chain
     chain.next_block_time = self.current_time + geom.rvs(p=chain.get_mining_power_sum()/self.avg_block_time)
    def single_sim(self):
        bootret_by_year = []
        cumulative_lengths = []
        data_panel = []
        for i in range(self.holding_period):
            index_start = self.data.sample().index[0]
            index_start = self.data.index.get_loc(index_start)
            L_i = self.holding_period + 1
            while L_i > self.holding_period:
                L_i = geom.rvs(p = 1/self.mean_block_length)
            cumulative_lengths.append(L_i)
            if sum(cumulative_lengths) > self.holding_period:
                L_final = self.holding_period - sum(cumulative_lengths[:-1])
                if L_final > len(self.data) - index_start:
                    diff = L_final - (len(self.data) - index_start)
                    subsample_generated = self.data.iloc[index_start-diff: (index_start-diff + L_final), :]
                else:
                    subsample_generated = self.data.iloc[index_start: index_start + L_final, :]
                data_panel.append(subsample_generated)
                break
            else:
                subsample_generated = self.data.iloc[index_start: index_start + L_i, :]
                if L_i > len(self.data) - index_start :
                    L_i = len(self.data) - index_start
                data_panel.append(subsample_generated)
                cumulative_lengths[-1] = L_i

        bootstrapSample = pd.concat([subsample for subsample in data_panel], axis = 0, ignore_index = True)

        if self.stress_freq:
            historical_ret_by_year = self.data @ np.array([self.w1_stock, self.w2_bond, self.w3_gold]).T
            year_min_ret = historical_ret_by_year.idxmin()
            for i in range(self.holding_period):
                extreme_event_dummy = True if np.random.rand() < 0.05 else False
                if extreme_event_dummy:
                    if self.stress_intensity == 1:
                        bootstrapSample.iloc[i,:] = self.data.loc[year_min_ret,:]
                    else:
                        bootstrapSample.iloc[i,:] = self.data.loc[year_min_ret,:]
                        bootstrapSample.iloc[i,:] *= 1.5

        total_ret_by_year = bootstrapSample @ np.array([self.w1_stock, self.w2_bond, self.w3_gold]).T
        total_ret_by_year -= self.TER

        portfolio_path = self.capital * np.cumprod(total_ret_by_year + 1)

        cagr = (portfolio_path.values[-1] / self.capital) ** (1/self.holding_period) - 1
        annual_volatility = total_ret_by_year.std()
        maxDrawdown = max_drawdown(pd.Series(total_ret_by_year))
        omega_ratio2 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.02, annualization = 1)
        omega_ratio4 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.04, annualization = 1)
        omega_ratio8 = omega_ratio(pd.Series(total_ret_by_year), required_return = 0.08, annualization = 1)
        return (np.insert(portfolio_path.values, 0, self.capital), cagr, annual_volatility, maxDrawdown,
                omega_ratio2, omega_ratio4, omega_ratio8)
Exemple #8
0
    def emulate_sensor(self, y_in, threshold):
        finished = False
        p = 0.2
        i = 0
        y_out = np.abs(y_in) > threshold

        while not finished:
            i += geom.rvs(p) + 7
            if i < len(y_in):
                y_out[i] = ~y_out[i]
            else:
                finished = True

        return y_out.astype(np.short)
Exemple #9
0
def FFS(G, fracs):
    L = {}
    Discovered = {}
    for node in G.nodes():
        Discovered[node] = False
    s = random.choice(G.nodes())
    Discovered[s] = True
    FFSEdges = []
    reEdges = []
    i = 0  # 计算器
    L[i] = set([s])
    Length = 1
    f = 0
    N = 0
    while len(L[i]) > 0:
        N += 1
        if N > 6 * G.number_of_nodes():
            L = {}
            Discovered = {}
            for node in G.nodes():
                Discovered[node] = False
            s = random.choice(G.nodes())
            Discovered[s] = True
            FFSEdges = []
            reEdges = []
            i = 0  # 计算器
            L[i] = set([s])
            Length = 1
            f = 0
            N = 0
        L[i + 1] = set()
        for node in L[i]:
            from scipy.stats import geom
            numNeighbors = int(geom.rvs(0.7))
            tempArray = G.neighbors(node)
            random.shuffle(tempArray)
            for neighbor in tempArray[:numNeighbors]:
                FFSEdges.append((node, neighbor))
                L[i + 1].add(neighbor)
                if Discovered[neighbor] == False:
                    Discovered[neighbor] = True
                    Length += 1
                    if Length >= fracs[f] * G.number_of_nodes():
                        reEdges.append(FFSEdges[:])
                        f += 1
                        if f >= len(fracs):
                            return reEdges
        i += 1
    return reEdges
Exemple #10
0
def sample_from_geometric_distribution(e, N):

    # geometry distribution parameter
    p = 1.0 / e

    while True:

        # sample N bids from geometric distribution
        r = geom.rvs(p, size=N)

        # calculate the average of all N sampled data point
        feedback = math.floor(r.mean())

        # ensure that feedback is positive, or sample feedback again
        if feedback > 0:
            break

    return feedback
 def __init__(self,
              space="ring",
              mobility="SRW",
              num=1):  #for symmetric random walk
     strn = "node" + str(num) + "loc"
     self.loc = int(
         load_context(strn,
                      int(np.ceil(np.random.randint(0, 359) / theta_m))))
     #initial angle, in theta_m units
     #print_log("NODE","Initial Location ",self.loc);
     self.mobilityscale = 1000
     #mobilityscale is in terms of samples. For each mobilityscale number of samples, the node moves left or right with equal probability
     #this is also the scale at which next transmission probabilities are decided
     strn = "node" + str(num) + "p"
     self.p = float(load_context(strn, 0.05))
     #probability of transmitting in a sample duration
     strn = "node" + str(num) + "next_event"
     self.next_event = int(
         load_context(strn, self.mobilityscale * geom.rvs(self.p)))
     #gets the first value for tranmission slot.
     #this is not the global time. this is time-to-next-event
     self.state = "IDLE"
     #better handling with FSM is required here
     self.samplenum = 0
     #the ongoing IQ sample number
     self.num = num
     strn = "node" + str(num) + "num_attempts"
     self.num_attempts = int(load_context(strn, 1))
     #2 is added to the length to ensure that the begining and end
     #are zero so that the receiver can perform energy detection.
     payload = BitArray(int=self.get_loc(), length=16)
     payload = payload + BitArray(int=self.num, length=16)
     #print("payload ",payload)
     y = MAC_PHYSICAL_LAYER_PACKET(mac_payload_size=len(payload),
                                   SF=8,
                                   mac_payload_stream=payload)
     self.pktlen = len(y) + 2
     #assume len(y) IQ samples per physical layer transmission.
     self.IQ = (0 + 0j) * np.ones(self.pktlen)
     #replace this by IQ samples
     #print("length... ",len(self.IQ))
     self.IQ[1:len(y) + 1] = y
     strn = "node" + str(num) + "last_event_time"
     self.last_event_time = int(load_context(strn, 0))
Exemple #12
0
def ts_array(n, n_sim, R, l, sim, endcorr):
    end_part = n if endcorr else n - l + 1
    cont = True
    if (sim == "geom"):
        len_tot = np.repeat(0, R)
        lens = np.array(([None] * R))
        while (cont):
            temp = 1 + geom.rvs(1 / l, size=R)
            temp = np.min(np.array([temp, n_sim - len_tot]), axis=0)
            lens = np.vstack((lens, temp))
            len_tot = len_tot + temp
            cont = (any(len_tot < n_sim))
        lens = lens[1:]
        nn = lens.shape[0]
        st = np.random.randint(end_part, size=(nn, R))
    else:
        nn = int(np.ceil(n_sim / l))
        lens = np.hstack((np.repeat(l, nn - 1), 1 + (n_sim - 1) % l))
        st = np.random.randint(end_part, size=(nn, R))
    return ({'starts': st, 'lengths': lens})
def get_distribution_under_prior(leaves,
                                 admixes=None,
                                 sim_length=1000,
                                 list_of_summaries=[],
                                 thinning_criteria=None,
                                 skewed_admixture_prior=False):
    if admixes is None:
        admix_sim = True
    else:
        admix_sim = False
    res = {summ.name: [] for summ in list_of_summaries}
    for _ in xrange(sim_length):
        if admix_sim:
            admixes = geom.rvs(p=0.5) - 1
        tree = generate_phylogeny(
            leaves, admixes, skewed_admixture_prior=skewed_admixture_prior)
        if thinning_criteria is None or thinning_criteria(tree):
            for n, summary in enumerate(list_of_summaries):
                res[summary.name].append(summary.summary_of_phylogeny(tree))
    return res
Exemple #14
0
def cluster_randomly_weigthed(document, prob_coref=0.03):
    """
    Randomly creates a coreferring link between a mention, m2, and any preceding
     mention, m1.
    Probability of selecting a closer antecedent is higher, according to a
     geometric distribution
    Probability of selecting an antecedent is prob_coref, else selects
     NO_ANTECEDENT for that given mention.
    @prob_coref defaults to 0.03 which is the approximate percentage of positive
     coreferring links.
    """
    links = np.ndarray(shape=(len(document.mentions), ), dtype=int)
    links.fill(Link.NO_ANTECEDENT)
    for idx in range(1, len(document.mentions)):
        if random.random() < prob_coref:
            random_antecedent = idx - geom.rvs(0.3)
            links[
                idx] = random_antecedent if random_antecedent >= 0 else Link.NO_ANTECEDENT

    return coreference_links_to_entity_clusters(links)
Exemple #15
0
    def sampleregex(self, depth=0):
        if depth < maxDepth:
            p_regex = self.p_regex
        else:
            p_regex = self.p_regex_no_recursion

        items = list(p_regex.items())
        idx = np.random.choice(range(len(items)), p=[p for k, p in items])
        R, p = items[idx]

        if R == pre.String:
            s = pre.Plus(pre.dot, p=0.3).sample()
            return R(s)
        elif R in self.character_classes:
            return R
        elif R in [pre.Concat, pre.Alt]:
            n = geom.rvs(0.8, loc=1)
            values = [self.sampleregex(depth + 1) for i in range(n)]
            return R(values)
        elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]:
            return R(self.sampleregex(depth + 1))
    def do_event(self):
        self.change_loc(self.next_event)
        #self.next_event is the last time interval
        self.last_event_time = self.last_event_time + self.next_event
        #current time
        #print("last event time of node**********",self.last_event_time)
        if self.state == "IDLE":  #next step is transmission
            self.state = "Tx"
            self.samplenum = 1
            print_log("NODE", "attempt no. ", self.num, self.num_attempts,
                      self.loc, self.last_event_time)
            self.next_event = 1
            #next event is IQ sample transmission again
        else:
            if self.state == "Tx":
                if self.samplenum == self.pktlen:  #last packet
                    #print("%%%%%%%% samplenum",self.samplenum)
                    self.state = "IDLE"
                    #better handling with FSM is required here
                    self.next_event = self.mobilityscale * geom.rvs(self.p)
                    #at the scale of mobilityscale (number of samples)
                    self.cur_loc = self.get_loc()
                    print_log("NODE", "Going to Idle...", self.num,
                              self.last_event_time, self.cur_loc)
                    self.change_loc(self.next_event)
                    payload = BitArray(int=self.get_loc(), length=16)
                    payload = payload + BitArray(int=self.num, length=16)
                    y = MAC_PHYSICAL_LAYER_PACKET(
                        mac_payload_size=len(payload),
                        SF=8,
                        mac_payload_stream=payload)
                    self.IQ[1:len(y) + 1] = y
                    self.samplenum = 0
                    #print("before next attempt",self.num_attempts,self.num)
                    self.num_attempts = self.num_attempts + 1

                else:  #not transiting to IDLE
                    self.state = "Tx"
                    self.samplenum = self.samplenum + 1
                    self.next_event = 1
def simulate_tree(no_leaves, no_admixes=None):
    if no_admixes is None:
        no_admixes = geom.rvs(p=0.5) - 1
    tree = generate_phylogeny(no_leaves, no_admixes)
    return unique_identifier_and_branch_lengths(tree)
Exemple #18
0
    def __call__(self, options, pars, obs=None, trackobs=False):
        """Simulate process model to get predicted
        choice and sample size distributions"""


        ### Basic setup
        np.random.seed()
        N     = pars.get('N', 10000)   # number of simulated trials
        max_T = int(pars.get('max_T', 1000)) # maximum sample size


        ### Stopping rules

        if self.stoprule == 'optional':
            threshold = pars.get('theta', 3)   # decision threshold (optional only)
            r      = pars.get('r', 0)           # rate of boundary collapse (optional only)
            stop_T = None

        # fixed sample size
        elif self.stoprule == 'fixedT':
            stop_T = pars.get('stop_T', 2)
            max_T  = stop_T
            threshold = 1000

        # geometric
        elif self.stoprule == 'fixedGeom':

            threshold = 1000
            p_stop_geom = pars.get('p_stop_geom')
            minss = pars.get('minsamplesize', 1)

            # sample size (not index), adjusted by minsamplesize
            stop_T = geom.rvs(p_stop_geom, size=N) + (minss - 1)

            # don't go past max_T
            stop_T[np.where(stop_T > max_T)[0]] = max_T


        ### Search

        # probability of sampling each option
        p_sample_H = pars.get('p_sample_H', .5)
        p_sample_L = 1 - p_sample_H

        # if p_switch is specified, it will be used to generate
        # sequences of observations (rather than p_sample_H and p_sample_L)
        p_switch   = pars.get('p_switch', None)

        # are the first two samples drawn from different options?
        switchfirst = pars.get('switchfirst', False)


        ### Sequential weights

        # compute value and attentional weights for multinomial problems
        if self.problemtype == 'multinomial':
            if self.rdw is None: wopt = options
            else:                wopt = self.rdw[pars['probid']]
            weights = np.array([cpt.pweight_prelec(option, pars) for option in wopt])
            values = np.array([cpt.value_fnc(option[:,0], pars) for option in options])
            v = np.array([np.multiply(weights[i], values[i]) for i in range(len(options))])
            V = v.sum(axis=1)
            evar = np.array([np.dot(weights[i], values[i] ** 2) - np.sum(v[i]) ** 2 for i in range(len(options))])
            sigma2 = np.max([np.sum(evar), 1e-10])
            sigma2mean = np.max([np.mean(evar), 1e-10])

            # sequential weights
            omega = []
            for i, option in enumerate(options):
                omega.append(weights[i]/option[:,1])
            omega = np.array(omega)
            omega[np.isnan(omega)] = 0
            w_outcomes = np.array([np.multiply(omega[i], values[i]) for i in range(len(options))])

        elif self.problemtype == 'normal':

            if 'pow_gain' in pars:
                w_options = np.array([[0,0],[0,0]])
                for i in range(2):
                    ev, evar = cpt.normal_raised_to_power(options[i], pars['pow_gain'])
                    w_options[i] = np.array([ev, evar])
                sigma2 = w_options[:,1].sum()
                evar = w_options[:,1]
            else:
                evar = options[:,1]
                sigma2 = options[:,1].sum()
                sigma2mean = options[:,1].mean()

        # scale by variance
        if 'sc' in pars:
            # raised to power
            sc = pars.get('sc')
            variance_scale = 1 / float(np.sqrt(sigma2) ** sc)
        elif 'sc2' in pars:
            # multiplicative
            sc = pars.get('sc2')
            variance_scale = 1 / float(np.sqrt(sigma2) * sc)
        elif 'sc0' in pars:
            sc0 = pars.get('sc0')
        elif 'sc_mean' in pars:
            sc = pars.get('sc_mean')
            variance_scale = 1 / float(np.sqrt(sigma2mean) ** sc)
        elif 'sc2_mean' in pars:
            sc = pars.get('sc2_mean')
            variance_scale = 1 / float(np.sqrt(sigma2mean) * sc)
        elif 'sc_x' in pars:
            variance_scale = pars.get('sc_x')
        else:
            variance_scale = 1


        ### Starting distribution

        Z = np.zeros(N)
        if 'tau' in pars:
            tau = pars.get('tau')
            Z = laplace.rvs(loc=0, scale=tau, size=N)

        elif 'tau_trunc' in pars:
            tau = pars.get('tau_trunc')
            dx = .001
            x = np.arange(-(threshold-dx), threshold, dx)
            p = laplace.pdf(x, loc=0, scale=tau)
            pn = p/p.sum()
            Z = np.random.choice(x, N, p=pn)

        elif 'tau_rel' in pars:
            tau = pars.get('tau_rel')
            tau = tau / variance_scale
            Z = laplace.rvs(loc=0, scale=tau, size=N)

        elif 'tau_rel_trunc' in pars:
            tau = pars.get('tau_rel_trunc')
            dx = .001
            x = np.arange(-1+dx, 1, dx)
            p = laplace.pdf(x, loc=0, scale=tau)
            pn = p/p.sum()
            Z = np.random.choice(x, N, p=pn)
            Z = Z * threshold

        elif 'tau_unif' in pars:
            #tau = pars.get('tau_unif', .001)
            #theta_max = pars.get('theta_max', theta)
            #theta_max = 200
            #rng = tau * theta_max
            rng = pars.get('tau_unif', .001)
            Z = np.linspace(-rng, rng, num=N)
            np.random.shuffle(Z)
            #Z = np.random.uniform(low=(-tau), high=tau, size=N)

        elif 'tau_unif_rel' in pars:
            dx = .001
            rng = pars.get('tau_unif_rel', .001)
            Z = np.linspace(-(threshold-dx) * rng, (threshold-dx) * rng, num=N)
            np.random.shuffle(Z)

        elif 'tau_normal' in pars:
            tau = pars.get('tau_normal')
            Z = norm.rvs(loc=0, scale=tau, size=N)

        elif 'tau_normal_trunc' in pars:
            tau = pars.get('tau_normal_trunc')
            dx = .001
            x = np.arange(-(threshold-dx), threshold, dx)
            p = norm.pdf(x, loc=0, scale=tau)
            pn = p/p.sum()
            Z = np.random.choice(x, N, p=pn)


        ### Simulate

        if obs is not None:

            # assume a single sequence of known observations
            sampled_option = obs['option'].values
            outcomes       = obs['outcome'].values
            max_T          = outcomes.shape[0]
            sgn            = 2*sampled_option - 1
            sv             = np.zeros(outcomes.shape)

            if self.problemtype is 'normal':

                c = pars.get('c', 0)

                # add weighting and criterion here
                sv = cpt.value_fnc(outcomes - c, pars)

            elif self.problemtype is 'multinomial':
                pass
                for i, opt in enumerate(options):
                    for j, x in enumerate(opt):
                        ind = np.where((sampled_option==i) & (outcomes==x[0]))[0]
                        sv[ind] = w_outcomes[i][j]

            sv = np.multiply(sv, sgn)
            sampled_option = np.tile(sampled_option, (N, 1))
            outcomes = np.tile(outcomes, (N, 1))
            sv = np.tile(sv, (N, 1))


        elif self.choicerule == 'random':
            sv = np.zeros((N, max_T))
            sampled_option = None
            outcomes = None

        else:
            # otherwise, simulate sampling from options

            if False and not trackobs and self.problemtype is 'multinomial' and p_switch is None:
                sampled_option = None
                outcomes = None

                valence = deepcopy(w_outcomes)
                valence[0] = -1 * valence[0]
                valence = valence.ravel()

                p = deepcopy(options[:,:,1])
                p[0] = p_sample_L * p[0]
                p[1] = p_sample_H * p[1]
                p = p.ravel()

                sv = np.random.choice(valence, p=p, size=(N, max_T))

                # ensure that both options are sampled
                # at least once
                if switchfirst:
                    first = np.random.binomial(1, .5, size=N)
                    second = 1 - first
                    first2 = np.transpose((first, second))
                    sampled_A = first2==0
                    sampled_B = first2==1

                    observed_A = np.random.choice(range(len(w_outcomes[0])),
                                                size=sampled_A.sum(),
                                                p=options[0][:,1])
                    observed_B = np.random.choice(range(len(w_outcomes[1])),
                                                size=sampled_B.sum(),
                                                p=options[1][:,1])

                    # subjective weighting
                    sv2 = np.zeros((N, 2))
                    sv2[sampled_A] = -1 * w_outcomes[0][observed_A]
                    sv2[sampled_B] =      w_outcomes[1][observed_B]
                    sv[:,:2] = sv2

            else:

                # which option was sampled
                sampled_option = np.zeros((N, max_T), int)

                if p_switch is None:
                    # ignore switching, just search based on [p_sample_H, p_sample_L]
                    sampled_option = np.random.binomial(1, p_sample_H, size=(N, max_T))
                else:
                    # generate search sequences based on p_switch
                    switches = np.random.binomial(1, p_switch, size=(N, max_T - 1))
                    sampled_option[:,0] = np.random.binomial(1, .5, size=N)
                    for i in range(max_T - 1):
                        switch_i = switches[:,i]
                        sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch_i)

                # ensure both options sampled at least once
                if switchfirst:
                    first = np.random.binomial(1, .5, size=N)
                    sampled_option[:,0] = first
                    sampled_option[:,1] = 1 - first

                # FOR SIMULATION
                #sampled_option = np.zeros((N, max_T), int)
                #for i in range(N):
                #    arr = sampled_option[i]
                #    arr[:(max_T/2)] = 1
                #    np.random.shuffle(arr)
                #    sampled_option[i] = arr


                # FOR SIMULATION
                #p_switch = pars.get('p_switch', .5)

                #sampled_option = np.zeros((N, max_T), int)
                #sampled_option[:,0] = np.random.choice([0, 1], p=[.5, .5], size=N)

                #for i in range(max_T - 1):
                #    switch = np.random.choice([0, 1], p=[1-p_switch, p_switch], size=N)
                #    sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch)


                sampled_A = sampled_option==0
                sampled_B = sampled_option==1
                N_sampled_A = sampled_A.sum()
                N_sampled_B = sampled_B.sum()

                # observation matrix - which outcome occurred (by index)
                observed = np.zeros((N, max_T), int)
                if self.problemtype == 'multinomial':
                    observed_A = np.random.choice(range(len(w_outcomes[0])),
                                                size=sampled_A.sum(),
                                                p=options[0][:,1])
                    observed_B = np.random.choice(range(len(w_outcomes[1])),
                                                size=sampled_B.sum(),
                                                p=options[1][:,1])
                    observed[sampled_A] = observed_A
                    observed[sampled_B] = observed_B


                # record outcomes experienced (by value)
                outcomes = np.zeros((N, max_T))
                if self.problemtype == 'multinomial':
                    obj_outcomes = options[:,:,0]
                    #outcomes[sampled_A] = obj_outcomes[0][observed_A]
                    #outcomes[sampled_B] = obj_outcomes[1][observed_B]

                    # note weighting already done above
                    outcomes[sampled_A] = w_outcomes[0][observed_A]
                    outcomes[sampled_B] = w_outcomes[1][observed_B]
                    outcomes_A = outcomes[sampled_A]
                    outcomes_B = outcomes[sampled_B]
                else:
                    A, B = options
                    sigmaA = np.sqrt(A[1])
                    sigmaB = np.sqrt(B[1])

                    # weird conversion for np.truncnorm
                    lowerA, upperA = (X_MIN - A[0]) / sigmaA, (X_MAX - A[0]) / sigmaA
                    lowerB, upperB = (X_MIN - B[0]) / sigmaB, (X_MAX - B[0]) / sigmaB
                    outcomes_A = np.round(truncnorm.rvs(lowerA, upperA, loc=A[0], scale=sigmaA, size=N_sampled_A))
                    outcomes_B = np.round(truncnorm.rvs(lowerB, upperB, loc=B[0], scale=sigmaB, size=N_sampled_B))
                    outcomes[sampled_A] = outcomes_A
                    outcomes[sampled_B] = outcomes_B

                    if 'pow_gain' in pars:
                        outcomes   = cpt.value_fnc(outcomes, pars)
                        outcomes_A = cpt.value_fnc(outcomes_A, pars)
                        outcomes_B = cpt.value_fnc(outcomes_B, pars)


                # comparison
                sv = np.zeros((N, max_T))


                # criteria for each option
                if 'c' in pars:
                    # compare to constant
                    c = pars.get('c')
                    c_A = c * np.ones(outcomes_A.shape)
                    c_B = c * np.ones(outcomes_B.shape)

                elif 'c_0' in pars:
                    # compare to sample mean
                    c_0 = pars.get('c_0', 45)

                    sum_A = np.cumsum(np.multiply(sampled_A, outcomes), axis=1)
                    N_A = np.cumsum(sampled_A, axis=1, dtype=float)
                    mn_A = np.multiply(sum_A, 1/N_A)
                    mn_A[np.isnan(mn_A)] = c_0

                    sum_B = np.cumsum(np.multiply(sampled_B, outcomes), axis=1)
                    N_B = np.cumsum(sampled_B, axis=1, dtype=float)
                    mn_B = np.multiply(sum_B, 1/N_B)
                    mn_B[np.isnan(mn_B)] = c_0

                    compA = np.multiply(outcomes - mn_B, sampled_A)
                    compB = np.multiply(outcomes - mn_A, sampled_B)
                    #sv = (-1 * compA) + compB

                else:
                    # (default) compare to true (weighted)
                    # mean of other option
                    if self.problemtype is 'multinomial':
                        A, B = V
                    elif self.problemtype is 'normal':
                        if 'pow_gain' in pars:
                            A, B = w_options[:,0]
                        else:
                            A, B = options[:,0]
                    c_A = B * np.ones(outcomes_A.shape)
                    c_B = A * np.ones(outcomes_B.shape)

                # combine
                if 'c_0' in pars:
                    sv = (-1 * compA) + compB
                else:
                    sv[sampled_A] = -1 * (outcomes_A - c_A)
                    sv[sampled_B] =      (outcomes_B - c_B)


                if 'sc0' in pars:

                    # for any options with a variance of zero,
                    # replace with sc0
                    evar[evar==0.] = sc0

                    # scaling factor for each option depends on
                    # its variance
                    sc_A, sc_B = 1/np.sqrt(evar)
                    sv[sampled_A] = sv[sampled_A] * sc_A
                    sv[sampled_B] = sv[sampled_B] * sc_B
                else:
                    # fixed scaling factor across all options
                    sv = sv * variance_scale


                # noise
                if 'c_sigma' in pars:
                    c_sigma = pars.get('c_sigma')
                    err = np.random.normal(loc=0, scale=c_sigma, size=outcomes.shape)

                elif 'dv_sigma' in pars:
                    dv_sigma = pars.get('dv_sigma')
                    err = np.random.normal(loc=0, scale=dv_sigma, size=N)
                    err = np.tile(err, (max_T, 1)).transpose()
                else:
                    err = np.zeros(outcomes.shape)

                sv = sv + err

        ### Accumulation

        # add starting states to first outcome
        sv[:,0] = sv[:,0] + Z

        # p_stay
        #p_stay = pars.get('p_stay', 0)
        #if p_stay > 0:
        #    attended = np.random.binomial(1, 1-p_stay, size=(N, max_T))
        #    sv = np.multiply(sv, attended)


        # accumulate
        P = np.cumsum(sv, axis=1)


        ### Stopping

        if self.stoprule == 'optional':
            if r > 0:
                # collapsing boundaries
                threshold_min = .1
                upper = threshold_min * np.ones((N, max_T))
                dec = np.arange(threshold, threshold_min, -r*threshold)
                dec = dec[:max_T]
                upper[:,:dec.shape[0]] = np.tile(dec, (N, 1))

                lower = -threshold_min * np.ones((N, max_T))
                inc = np.arange(-threshold, -threshold_min, r*threshold)
                inc = inc[:max_T]
                lower[:,:inc.shape[0]] = np.tile(inc, (N, 1))

                crossed = -1 * (P < lower) + 1 * (P > upper)
            else:
                # fixed boundaries
                crossed = -1 * (P < -threshold) + 1 * (P > threshold)

            # if minimum sample size, prevent stopping
            minsamplesize = pars.get('minsamplesize', 1) - 1
            crossed[:,:minsamplesize] = 0

            # any trials where hit max_T, make decision based on
            # whether greater or less than zero
            nodecision = np.where(np.sum(np.abs(crossed), axis=1)==0)[0]
            if len(nodecision) > 0:
                n_pos = np.sum(P[nodecision,max_T-1] > 0)
                n_eq = np.sum(P[nodecision,max_T-1] == 0)
                n_neg = np.sum(P[nodecision,max_T-1] < 0)
                #assert n_eq == 0, "reached max_T with preference of 0"

                crossed[nodecision,max_T-1] +=  1*(P[nodecision,max_T-1] >= 0)
                crossed[nodecision,max_T-1] += -1*(P[nodecision,max_T-1] < 0)

        elif self.stoprule == 'fixedT':
            crossed = np.zeros((N, stop_T), dtype=int)
            crossed[:,(stop_T-1)] = np.sign(P[:,(stop_T-1)])

            indifferent = np.where(crossed[:,(stop_T-1)]==0)[0]
            n_indifferent = len(indifferent)
            crossed[indifferent] = np.random.choice([-1,1], p=[.5, .5], size=(n_indifferent,1))
            assert np.sum(crossed[:,(stop_T-1)]==0)==0

        elif self.stoprule == 'fixedGeom':

            crossed = np.zeros((N, max_T), dtype=int)
            crossed[range(N),stop_T-1] = np.sign(P[range(N),stop_T-1])

            indifferent = np.where(crossed[range(N),stop_T-1]==0)[0]
            n_indifferent = len(indifferent)
            t_indifferent = (stop_T-1)[indifferent]
            crossed[indifferent,t_indifferent] = np.random.choice([-1,1], p=[.5,.5], size=n_indifferent)


        if obs is not None:

            p_stop_choose_A = np.sum(crossed==-1, axis=0)*(1/float(N))
            p_stop_choose_B = np.sum(crossed==1, axis=0)*(1/float(N))
            p_sample = 1 - (p_stop_choose_A + p_stop_choose_B)

            return {'p_stop_choose_A': p_stop_choose_A,
                    'p_stop_choose_B': p_stop_choose_B,
                    'p_sample': p_sample,
                    'traces': P}

        else:

            # samplesize is the **index** where threshold is crossed
            samplesize = np.sum(1*(np.cumsum(np.abs(crossed), axis=1)==0), axis=1)
            choice = (crossed[range(N),samplesize] + 1)/2
            p_resp = choice.mean()
            ss_A = samplesize[choice==0]
            ss_B = samplesize[choice==1]

            p_stop_A = np.zeros(max_T)
            p_stop_B = np.zeros(max_T)
            p_stop_A_f = np.bincount(ss_A, minlength=max_T)
            p_stop_B_f = np.bincount(ss_B, minlength=max_T)
            if self.stoprule == 'optional' or self.stoprule == 'fixedGeom':
                if p_stop_A_f.sum() > 0:
                    p_stop_A = p_stop_A_f/float(p_stop_A_f.sum())
                if p_stop_B_f.sum() > 0:
                    p_stop_B = p_stop_B_f/float(p_stop_B_f.sum())

            elif self.stoprule == 'fixedT':
                p_stop_A[stop_T-1] = 1
                p_stop_B[stop_T-1] = 1

            assert (p_stop_A_f.sum() + p_stop_B_f.sum()) == N

            p_stop_cond = np.transpose([p_stop_A, p_stop_B])
            p_stop_cond[np.isnan(p_stop_cond)] = 0.
            f_stop_cond = np.transpose([p_stop_A_f, p_stop_B_f])/float(N)


            # only include data up to choice
            outcome_ind = None
            traces = None
            if type(sampled_option) is np.ndarray and trackobs:
                sampled_option = [sampled_option[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]
                outcomes       = [outcomes[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]
                traces         = [P[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]
                if self.problemtype is 'multinomial':
                    outcome_ind    = [observed[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]


            return {'choice': choice,
                    'samplesize': samplesize + 1,
                    'p_resp': np.array([1-p_resp, p_resp]),
                    'p_stop_cond': p_stop_cond,
                    'f_stop_cond': f_stop_cond,
                    'sampled_option': sampled_option,
                    'outcomes': outcomes,
                    'outcome_ind': outcome_ind,
                    'traces': traces,
                    'Z': Z
                    }
Exemple #19
0
def sol_num(n, p1, p2, K):
    X = geom.rvs(p1, size=n)
    Y = geom.rvs(p2, size=n)
    proba = np.sum(X + Y > K) / n
    return proba
Exemple #20
0
def vector2(n, p, lambda1):
    N = geom.rvs(p, size=n)
    X = np.empty(shape=n)
    for i in range(n):
        X[i] = gamma.rvs(a=N[i], scale=1 / lambda1, size=1)
    return np.column_stack((N, X))
print("para k(overflow floods in 100 years) = 0 con 10000000 simulaciones: ",
      poiss0)
print("para k(overflow floods in 100 years) = 1 con 10000000 simulaciones: ",
      poiss1)
print("para k(overflow floods in 100 years) = 2 con 10000000 simulaciones: ",
      poiss2)
print("para k(overflow floods in 100 years) = 3 con 10000000 simulaciones: ",
      poiss3)
print("para k(overflow floods in 100 years) = 4 con 10000000 simulaciones: ",
      poiss4)
print("para k(overflow floods in 100 years) = 5 con 10000000 simulaciones: ",
      poiss5)
print("para k(overflow floods in 100 years) = 6 con 10000000 simulaciones: ",
      poiss6)

print("-----------------------------------------------------------------")
print("Ejercicio 4)\n")

p2 = 0.05
# x = 5
print("Con p = 0.05 y x = 5:")
y = geom.rvs(p2, size=size)

a5, b5 = np.unique(y, return_counts=True)
c5 = b5 / size

f5 = c5[4]
print(
    "Probabilidad de que se necesiten 5 intentos para enlazar \ncon exito una llamada en 10000000 simulaciones: ",
    f5)
Exemple #22
0
 def __init__(self, avgContent, number_of_files):
     self.avgContent = avgContent
     self.number_of_files = number_of_files
     
     
     self.filesize = geom.rvs(1.0/self.avgContent, size=number_of_files)
Exemple #23
0
ax.set_title('Función de probabilidad de Geom(0.3)')
ax.vlines(x, 0, geom.pmf(x, p), colors='b', lw=4, alpha=0.5)

rv = geom(p)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='--',
          lw=1,
          label="Frozen PMF")
ax.legend(loc='best')
plt.show()

print("Media %f" % mean)
print("Varianza %f" % var)
print("Sesgo %f" % skew)
print("Curtosis %f" % kurt)

fig, ax = plt.subplots(1, 1)
prob = geom.cdf(x, p)
ax.plot(x, prob, 'bo', ms=8, label="Función de distribución acumulada")
plt.title('Función de distribución acumulada')
plt.show()

fig, ax = plt.subplots(1, 1)
r = geom.rvs(p, size=10000)
plt.hist(r)
plt.title('Histograma de la random')
plt.show()
from scipy.stats import geom
import numpy as np
import seaborn as sb


simlen=int(1e8) # calculating expectation value upto the largest possible value keeping tim ein mind
p=0.25
t=[]
s=geom.rvs(p,size=simlen)
#for i in range(1,simlen):

unique, counts = np.unique(s, return_counts=True)
for i in range(len(unique)):
  t.append(unique[i]*counts[i]/simlen)#calculating expectation values of each event from the generated data
sum=0
for q in range(len(t)):
  sum=sum+t[q]
sim_expected=sum
theo_expected=4
#plotting
x = np.arange(0,25)

p=0.25
dist=geom(p)
ax = sb.barplot(x=x, y=dist.pmf(x))
ax.set(xlabel='n', ylabel='p(n)')
print(f'The theoretical expected value is 4 and the simulated expected value is {sim_expected}')
Exemple #25
0
 def _sample_scipy(self, size):
     p = float(self.p)
     from scipy.stats import geom
     return geom.rvs(p=p, size=size)
def test_posterior_model_multichain(true_tree=None,
                                    start_tree=None,
                                    sim_lengths=[250] * 800,
                                    summaries=None,
                                    thinning_coef=1,
                                    admixtures_of_true_tree=None,
                                    no_leaves_true_tree=4,
                                    wishart_df=None,
                                    sim_from_wishart=False,
                                    no_chains=8,
                                    result_file='results_mc3.csv',
                                    emp_cov=None,
                                    emp_remove=-1,
                                    rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)
    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if rescale_empirical_cov:
        posterior, multiplier = initialize_posterior(
            m,
            wishart_df,
            use_skewed_distr=True,
            rescale=rescale_empirical_cov)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=True,
                                         rescale=rescale_empirical_cov)
        multiplier = None
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    if rescale_empirical_cov:
        post_ = posterior(
            (scale_tree_copy(true_x[0],
                             1.0 / multiplier), true_x[1] / multiplier))
    else:
        post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_)
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = basic_meta_proposal()
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme_first = deepcopy(sample_verbose_scheme)
    if 'posterior' in sample_verbose_scheme:
        sample_verbose_scheme_first['posterior'] = (1, 1)  #(1,1)
        sample_verbose_scheme_first['no_admixes'] = (1, 1)
    #if 'likelihood' in sample_verbose_scheme:
    #sample_verbose_scheme_first['likelihood']=(1,1)
    print sample_verbose_scheme_first
    MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)],
           posterior_function=posterior,
           summaries=summaries,
           temperature_scheme=fixed_geometrical(800.0, no_chains),
           printing_schemes=[sample_verbose_scheme_first] +
           [sample_verbose_scheme for _ in range(no_chains - 1)],
           iteration_scheme=sim_lengths,
           overall_thinnings=int(thinning_coef),
           proposal_scheme=[adaptive_proposal() for _ in range(no_chains)],
           cores=no_chains,
           no_chains=no_chains,
           multiplier=multiplier,
           result_file=result_file,
           store_permuts=False)
    print 'finished MC3'
    #save_pandas_dataframe_to_csv(results, result_file)
    #save_permuts_to_csv(permuts, get_permut_filename(result_file))
    return true_tree
def test_posterior_model(true_tree=None,
                         start_tree=None,
                         sim_length=100000,
                         summaries=None,
                         thinning_coef=19,
                         admixtures_of_true_tree=None,
                         no_leaves_true_tree=4,
                         filename='results.csv',
                         sim_from_wishart=False,
                         wishart_df=None,
                         sap_sim=False,
                         sap_ana=False,
                         resimulate_regrafted_branch_length=False,
                         emp_cov=None,
                         big_posterior=False,
                         rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree,
                                       skewed_admixture_prior=sap_sim)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)

    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if big_posterior:
        posterior = initialize_big_posterior(m,
                                             wishart_df,
                                             use_skewed_distr=sap_ana)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=sap_ana,
                                         rescale=rescale_empirical_cov)
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_[:2])
    if summaries is None:
        summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()]
    proposal = adaptive_proposal(
        resimulate_regrafted_branch_length=resimulate_regrafted_branch_length)
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 1)
    sample_verbose_scheme['no_admixes'] = (1, 1)
    final_tree, final_posterior, results, _ = basic_chain(
        start_x,
        summaries,
        posterior,
        proposal,
        post=None,
        N=sim_length,
        sample_verbose_scheme=sample_verbose_scheme,
        overall_thinning=int(max(thinning_coef, sim_length / 60000)),
        i_start_from=0,
        temperature=1.0,
        proposal_update=None,
        check_trees=False)
    save_to_csv(results, summaries, filename=filename)
    return true_tree
Exemple #28
0
    # payoff matrix for player 2 - sent into FTPL
    payoff_matrix_p2 = {0: [], 1: [], 2: []}

    # action array for player 1 - selected by EW
    action_array_p1 = []

    # payoff matrix for player 2 - selected by FTPL
    action_array_p2 = []

    # calculate learning rate
    k = 3
    n = 100
    epsilon = theo_opt_epsilon(k, n)

    # generate hallucinations
    hallucinations = geom.rvs(epsilon, size=len(payoff_matrix_p2))

    possible_actions = np.array([0, 1, 2])

    p1_total_payoff = 0
    p2_total_payoff = 0

    regret_array_p1 = []
    regret_array_p2 = []

    combined_actions_over_time = []

    for r in range(n):
        ew_action = exponential_weights(payoff_matrix_p1, epsilon, h1, r)
        action_array_p1.append(ew_action)
"""
Generating and plotting geometric distributions

In sports it is common for players to make multiple attempts to score points for themselves or their teams. Each single attempt can have two possible outcomes, scoring or not scoring. Those situations can be modeled with geometric distributions. With scipy.stats you can generate samples using the rvs() function for each distribution.

Consider the previous example of a basketball player who scores free throws with a probability of 0.3. Generate a sample, and plot it.

numpy has been imported for you with the standard alias np.
Generate a sample with size=10000 from a geometric distribution with a probability of success of 0.3.
Plot the sample generated.
"""
# Import geom, matplotlib.pyplot, and seaborn
from scipy.stats import geom
import matplotlib.pyplot as plt
import seaborn as sns

# Create the sample
sample = geom.rvs(p=0.3, size=10000, random_state=13)

# Plot the sample
sns.distplot(sample, bins=np.linspace(0, 20, 21), kde=False)
plt.show()
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: center;
}
</style>
""")
plt.figure(dpi=100)

##### COMPUTATION #####
# DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
p_real = 0.3

# DRAW A SAMPLE OF N=1000
np.random.seed(42)
sample = geom.rvs(p=p_real, size=100)

##### SIMULATION #####
# MODEL BUILDING
with pm.Model() as model:
    p = pm.Uniform("p")
    geometric = pm.Geometric("geometric", p=p, observed=sample)
    
# MODEL RUN
with model:
    step = pm.Metropolis()
    trace = pm.sample(100000, step=step)
    burned_trace = trace[50000:]

# P - 95% CONF INTERVAL
ps = burned_trace["p"]
Exemple #31
0
 def sample(self, state=None):
     n = geom.rvs(self.p, loc=0)
     return "".join(self.val.sample(state) for i in range(n))
Exemple #32
0
    def __init__(self, avgContent, number_of_files):
        self.avgContent = avgContent
        self.number_of_files = number_of_files

        self.filesize = geom.rvs(1.0 / self.avgContent, size=number_of_files)
Exemple #33
0
        help="Cytosine deamination rate for double stranded DNA [%(default)s]")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    args.hairpin = args.hairpin.upper()

    ref1 = randstr(1000000)

    # trimodal fragment length distribution
    fraglens = poisson_mix_rvs((60, 80, 100), (0.7, 0.05), args.numreads)

    # overhang distribution
    overhangs = geom.rvs(1.0 / (1.0 + args.mean_sslen),
                         loc=-1,
                         size=args.numreads * 2)

    # output filenames
    collapsed = "{}collapsed.fastq".format(args.oprefix)
    uncollapsed1 = "{}uncollapsed_r1.fastq".format(args.oprefix)
    uncollapsed2 = "{}uncollapsed_r2.fastq".format(args.oprefix)

    hlen = len(args.hairpin)

    with open(collapsed, "w") as f_col, \
            open(uncollapsed1, "w") as f_unc1, \
            open(uncollapsed2, "w") as f_unc2:
        for rnum, fraglen in enumerate(fraglens):

            l_overhang, r_overhang = overhangs[2 * rnum:2 * rnum + 2]