Esempio n. 1
0
  def hypothesisTest(self, seq1, seq2, totalSeq1, totalSeq2):
    replicates = self.preferences['Replicates']
    
    # create null distribution
    pooledN = totalSeq1 + totalSeq2
    pooledP = float(seq1 + seq2) / pooledN
    
    diff = []
    for dummy in xrange(0, replicates):
      c1 = binomial(totalSeq1, pooledP)         
      c2 = binomial(totalSeq2, pooledP)  
            
      diff.append(float(c1) / totalSeq1 - float(c2) / totalSeq2) 
      
    # determine number of replicates w/ an effect size more extreme than the observed data
    obsDiff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2

    leftCount = 0
    rightCount = 0
    twoSidedCount = 0
    for value in diff:
      if value <= obsDiff:
        leftCount += 1
      if value >= obsDiff:
        rightCount += 1
      if abs(value) >= abs(obsDiff):
        twoSidedCount += 1
        
    oneSidedCount = leftCount
    if rightCount < oneSidedCount:
      oneSidedCount = rightCount
    
    return float(oneSidedCount) / replicates, float(twoSidedCount) / replicates, ''
Esempio n. 2
0
 def test_n_zero(self):
     # Tests the corner case of n == 0 for the binomial distribution.
     # binomial(0, p) should be zero for any p in [0, 1].
     # This test addresses issue #3480.
     zeros = np.zeros(2, dtype='int')
     for p in [0, .5, 1]:
         assert_(random.binomial(0, p) == 0)
         np.testing.assert_array_equal(random.binomial(zeros, p), zeros)
Esempio n. 3
0
def test_mb5():
    xmax = 3
    n = 1000
    x = np.concatenate((binomial(xmax,0.1,9*n),binomial(xmax,0.9,n)))
    MB = tbm.TwoBinomialMixture()
    MB.EMalgo(x,xmax)
    MB.parameters()
    assert(np.absolute(MB.Lambda-0.9)<0.1)
Esempio n. 4
0
def test_mb7():
    xmax = 5
    n = 100
    x = np.concatenate((binomial(xmax,0.1,99*n),binomial(xmax,0.8,n)))
    MB = tbm.TwoBinomialMixture()
    MB.EMalgo(x,xmax)
    MB.parameters()
    assert MB.r1>0.7
Esempio n. 5
0
def test_mb6():
    xmax = 5
    n = 1000
    x = np.concatenate((binomial(xmax,0.05,9*n),binomial(xmax,0.5,n)))
    MB = tbm.TwoBinomialMixture()
    MB.EMalgo(x,xmax)
    MB.parameters()
    assert MB.r0<.1 
Esempio n. 6
0
def bino_mutual(filename,periods):
    print ('Binomial simulation of mutualistic interaction %s %ld' %(filename,periods))
    tinic=time()
    
    filename_a=filename+'_a.txt'
    minputchar_a=dlmreadlike(filename_a)
    nrows_a=len(minputchar_a)
    ncols_a=len(minputchar_a[0])
    for i in range (nrows_a):
        for j in range (ncols_a):
            minputchar_a[i][j]=float(minputchar_a[i][j])
    numspecies_a=ncols_a
    print ("numspecies a %d" %numspecies_a)
    K_a=[]
    Nindividuals_a=[]
    rowNindividuals_a=[]
    r_a=[]
    
    filename_b=filename+'_b.txt'
    minputchar_b=dlmreadlike(filename_b)
    nrows_b=len(minputchar_b)
    ncols_b=len(minputchar_b[0])
    for i in range (nrows_b):
        for j in range (ncols_b):
            minputchar_a[i][j]=float(minputchar_b[i][j])
    numspecies_b=nrows_b-3;
    print ("numspecies b %d" %numspecies_b)
    K_b=[]
    Nindividuals_b=[]
    rowNindividuals_b=[]
    r_b=[]
    
    for n in range(numspecies_a):
        rowNindividuals_a.append(int(minputchar_a[nrows_a-3][n]))
        K_a.append(int(minputchar_a[nrows_a-2][n]))
        r_a.append(minputchar_a[nrows_a-1][n])
    Nindividuals_a.append(rowNindividuals_a)
    period_year=365
    for k in range (periods-1):
        rowNi=[]
        for n in range (numspecies_a):
            rperiod=float(r_a[n]/period_year)
            # Variation due to malthusian parameter r
            incNmalth=binomial(Nindividuals_a[k][n],1-exp(-1*rperiod));
            # Second term of logistic equation
            incNlogistic= binomial((Nindividuals_a[k][n]**2)/K_a[n],1-exp(-1*rperiod));
            # Terms due to other species
            incNOtherspecies=0;
            for j in range(numspecies_a):
                incNOtherspecies=incNOtherspecies+binomial(round(Nindividuals_a[k][j]*Nindividuals_a[k][n]/K_a[n]),1-exp(-1*rperiod*minputchar_a[n][j]));
            rowNi.append(round(Nindividuals_a[k][n]+incNmalth-incNlogistic+incNOtherspecies))  
        Nindividuals_a.append(rowNi)
    tfin=time()
    print ("Elapsed time %f s" % (tfin-tinic))
    dlmwritelike(filename_a,periods,Nindividuals_a,'bino')
    plt.plot(Nindividuals_a)
    plt.show()
Esempio n. 7
0
def test_mb4():
    xmax = 5
    n = 100
    x = np.concatenate((binomial(xmax,0.1,n),binomial(xmax,0.9,n)))
    MB = tbm.TwoBinomialMixture()
    MB.EMalgo(x,xmax)
    MB.parameters()
    kappa  = MB.kappa()
    assert(np.absolute(MB.Lambda-0.5)<0.1)
Esempio n. 8
0
def random_walk(numsteps):
    x = np.zeros(numsteps)
    y = np.zeros(numsteps)
    for i in range(numsteps):
        if random.binomial(1, 0.5) == 0.:
            y[i] = random.binomial(1, 0.99)*random.choice([-1, 1])
        else:
            x[i] = random.binomial(1, 0.99)*random.choice([-1, 1])
    lim = max(max(abs(np.cumsum(x))), max(abs(np.cumsum(y)))) + 1.
    return np.cumsum(x), np.cumsum(y), (-lim, lim), (-lim, lim)
Esempio n. 9
0
def getRandomCoinFlip(p):
  max_flips = 10000
  coin_flips = binomial(1, p, max_flips)
  position = 0
  while True:
    if position == max_flips:
      coin_flips = binomial(1, p, max_flips)
      position = 0
    else:
      yield coin_flips[position]
      position += 1
Esempio n. 10
0
def bern_y(X,p1,base_prob=.25,beta_sd=1):
    n,p = X.shape
    X_1    = X[:,:p1]
    v = 0 
    while v<1E-5:
        beta   = npran.randn(p1)*beta_sd
        if p1>0:
            eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
            y      = npran.binomial(1,invlogit(eta),n)
        else:
            y      = npran.binomial(1,base_prob,n)
        v = np.min(nplin.svd(np.hstack((X,y[:,np.newaxis])))[1])
    return y
Esempio n. 11
0
    def perform(self, X):
                
        images = numpy.copy(X)
        for idx in range(images.shape[0]):
        
            img_arr = images[idx].reshape(self.img_width, self.img_height)
            
            # Perform a horizontal reflexion, maybe...
            if binomial(n=1,p=self.p_hsymetry) == 1:
                img_arr = img_arr[:,::-1]
                            
            # Perform a translation, maybe...
            if binomial(n=1,p=self.p_translation) == 1:
                h_translation = numpy.random.randint(self.min_translation_pixels,
                                                     self.max_translation_pixels + 1)
                v_translation = numpy.random.randint(self.min_translation_pixels,
                                                     self.max_translation_pixels + 1)                 
                
                # Perform horizontal translation 
                if h_translation < 0:
                    temp = img_arr[:,-h_translation:]
                    img_arr[:,:h_translation] = temp
                    img_arr[:,h_translation:] = 0
                elif h_translation > 0:
                    temp = img_arr[:,:-h_translation]
                    img_arr[:,h_translation:] = temp
                    img_arr[:,:h_translation] = 0
                             
                # Perform vertical translation 
                if v_translation < 0:
                    temp = img_arr[-v_translation:,:]
                    img_arr[:v_translation,:] = temp
                    img_arr[v_translation:,:] = 0
                elif v_translation > 0:
                    temp = img_arr[:-v_translation,:]
                    img_arr[v_translation:,:] = temp
                    img_arr[:v_translation,:] = 0        
             
            # Perform a rotation, maybe...
            if binomial(n=1,p=self.p_rotation) == 1:
                deg_rotation = numpy.random.randint(self.min_rotation_degrees,
                                                    self.max_rotation_degrees + 1)

                if deg_rotation != 0:
                    img = Image.fromarray(img_arr)
                    img = img.rotate(deg_rotation)
                    img_arr = numpy.array(img)
                  
            images[idx] = img_arr.reshape(self.img_width * self.img_height)
         
        return images
Esempio n. 12
0
def genXy_bern_X_norm_beta(seed,n,p1,pnull,x_prob=.25,base_prob=.25,beta_sd=1):
    """ The X are normal. p1 predictive vars, pnull null vars. beta on the p1 vars is ~normal(0,beta_sd) and the intercept is logit(base_prob)"""
    if not seed == None:
        npran.seed(seed)
    X_1    = npran.binomial(1,x_prob,(n,p1))
    X_null = npran.binomial(1,x_prob,(n,pnull))
    X      = np.concatenate((X_1,X_null),axis=1)
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y
Esempio n. 13
0
def hht(nruns):
    sum_flips = 0
    for i in range(nruns):
        flips = []
        for j in range(3):
            flips.append(npr.binomial(1, 0.5, 3)[j])
        while not(flips[len(flips)-1] == 0 and flips[len(flips)-2] == 1 and flips[len(flips)-3] == 1):
            flips.extend([npr.binomial(1, 0.5, 1)[0]])
        else:
            pass
        sum_flips += len(flips)
    print(sum_flips)
    avg = float(sum_flips/nruns)
    return avg
def maisant_exp(rounds):
  # both max_repeats and rounds need to go to infinity to obtain
  # covnergence
  pmf = np.array([0,0,0], dtype=float)
  for i in range(rounds):
    flip = [binomial(1,0.5), binomial(1,0.5)]
    if flip == [1,1]: continue
    elif flip == [1,0]: pmf[0] += 1
    elif flip == [0,1]: pmf[1] += 1
    elif flip == [0,0]: pmf[2] += 1
    else:
      print 'ERROR'
      exit

  return pmf/sum(pmf)
Esempio n. 15
0
def transition(iD, ps):
    h, r, u, nr, im, Si = ps
    if len(list(iD)) == 0: return iD

    for k, v in iD.items():
        ri =  v['q']/v['mt']
        ap =  1/(1+ri) * v['age']/(1+v['age'])
        dp = v['rp'] * ri/(1+ri)

        if v['st'] == 'd' and binomial(1, dp) == 1:
            iD[k]['st'] = 'a'
        elif v['st'] == 'a' and binomial(1, ap) == 1:
            iD[k]['st'] = 'd'

    return iD
Esempio n. 16
0
def SimulateCreditNetwork(CN, params, DP, TR, BV, SC):
	"""
	CN - credit network
	DP - default probability array
	TR - transaction rate matrix
	BV - buy value matrix
	SC - sell cost matrix
	price - function to determine a price from value and cost
	events - number of transactions to simulate
	"""
	price = params["price"]
	events = params["events"]
	strategies = params["strategies"]
	prevent_zeros = params["prevent_zeros"]

	payoffs = dict([(n,0.) for n in CN.nodes])
	defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes)

	# If all agents with the same strategy default, we'll get bad payoff data
	while prevent_zeros:
		prevent_zeros = False
		for strat in set(strategies):
			agents = filter(lambda a: strategies[a]==strat, CN.nodes)
			if all([a in defaulters for a in agents]):
				prevent_zeros = True
				defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes)
				break

	for d in defaulters:
		for n in CN.nodes:
			if CN.adjacent(n, d):
				payoffs[n] -= CN.weights[(n, d)]
		CN.removeNode(d)
		del payoffs[d]

	m = R.multinomial(events, array(TR.flat))
	l = TR.shape[0]
	transactors = sum([[(i/l,i%l)]*m[i] for i in range(l**2)], [])
	R.shuffle(transactors)
	for b,s in transactors:
		try:
			assert b in CN.nodes and s in CN.nodes
			CN.routePayment(b, s, price(BV[b,s], SC[b,s]))
		except (AssertionError, CreditError):
			continue
		payoffs[b] += BV[b,s]
		payoffs[s] -= SC[b,s]
	return payoffs
	def num_parental_removed(self):
		p = self.immune_removal_probability()
		n = len(self.viruses)

		removed = binomial(n, p)

		return removed
Esempio n. 18
0
    def get_next_batch(self):
        epoch,  batchnum = self.curr_epoch, self.curr_batchnum
        self.advance_batch()
        data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand
        labels = n.require(binomial(1, 0.3, (self.num_cases, self.num_classes)), requirements='C', dtype=n.single)

        return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels}
Esempio n. 19
0
def genXy_binary_X_norm_beta(seed,n,p1,pnull,base_prob=.25,beta_sd=1,A_base_diag=-1,A_sd=.2):
    ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal '''
    if not seed == None:
        npran.seed(seed)
    p = p1 + pnull
    A = npran.normal(0,.2,(p,p))-np.diag(A_base_diag*np.ones(p))
    X = draw_random_binary(n,A)
    X_1    = X[:,:p1]
    X_null = X[:,p1:]
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y
	def allow_immune_removal_slow(self):
		"""
		This method allows the removal of a certain number of viruses to be 
		removed from the host due to immune system pressure.

		Note: this method may be deprecated in favor of precomputing the 
		number of progeny.
		"""

		current_time = self.environment.current_time
		last_infection_time = max(self.infection_history.keys())
		time_difference = current_time - last_infection_time

		p = float(time_difference) / (self.immune_halftime + time_difference)
		n = len(self.viruses)

		# # print("Time Difference: %s, Probability: %s" % (time_difference, 
			# p))
		num_viruses_to_remove = binomial(n, p)
		# num_viruses_to_remove = int(0.6 * len(self.viruses))
		# # print('Removing %s viruses out of %s viruses from host %s.' % (
			# num_viruses_to_remove, len(self.viruses), id(self)))

		viruses_to_remove = sample(self.viruses, num_viruses_to_remove)
		for virus in viruses_to_remove:
			self.remove_virus(virus)

		# # print('Host %s is left with %s viruses.' % (id(self), len(self.
			# viruses)))

		return self
Esempio n. 21
0
def drop_samples(game, prob):
    """Drop samples from a sample game

    Samples are dropped independently with probability prob."""
    sample_map = {}
    for prof, pays in zip(np.split(game.profiles, game.sample_starts[1:]),
                          game.sample_payoffs):
        num_profiles, _, num_samples = pays.shape
        perm = rand.permutation(num_profiles)
        prof = prof[perm]
        pays = pays[perm]
        new_samples, counts = np.unique(
            rand.binomial(num_samples, prob, num_profiles), return_counts=True)
        splits = counts[:-1].cumsum()
        for num, prof_samp, pay_samp in zip(
                new_samples, np.split(prof, splits), np.split(pays, splits)):
            if num == 0:
                continue
            prof, pays = sample_map.setdefault(num, ([], []))
            prof.append(prof_samp)
            pays.append(pay_samp[..., :num])

    if sample_map:
        profiles = np.concatenate(list(itertools.chain.from_iterable(
            x[0] for x in sample_map.values())), 0)
        sample_payoffs = tuple(np.concatenate(x[1]) for x
                               in sample_map.values())
    else:  # No data
        profiles = np.empty((0, game.num_role_strats), dtype=int)
        sample_payoffs = []

    return rsgame.samplegame_copy(game, profiles, sample_payoffs, False)
def SequenceDynSelf(protocell,mu,L,N):

    q = (1-mu)**L
    total=np.sum(protocell)
    global test

    while (total != 2*N):

    	"Pick the  sequence type"

        sec_freq=protocell/total
        values=np.arange(len(protocell))
        custm = sps.rv_discrete(name='custm', values=(values, sec_freq))
        R = custm.rvs(size=1)
        R=R.tolist()
        R=int(R[0])
        sample=R

        test = nprandom.binomial(1,q)

        if sample == 0:
            protocell[0]=protocell[0]+1
        elif test == 1:
            protocell[sample]=protocell[sample]+1
        else:
            protocell[0]=protocell[0]+1
        total=np.sum(protocell)

    return protocell
Esempio n. 23
0
 def __init__(self, setup, data, particle, neg_samples, ent_burnin=0, pred_burnin=0):
     """
     Initialise the data interface
     :param setup: semantic function model with training setup
     :param data: observed data of the form (nodeid, pred, out_labs, out_ids, in_labs, in_ids), with increasing nodeids
     :param particle: fantasy particle of the form (nodeid, out_labs, out_ids, in_labs, in_ids), with increasing nodeids 
     :param neg_samples: number of negative pred samples to draw for each node
     :param ent_burnin: (default 0) number of update steps to take for latent entities
     :param pred_burnin: (default 0) number of update steps to take for negative preds
     """
     # Training setup
     self.setup = setup
     self.model = setup.model
     # Negative pred samples
     self.NEG = neg_samples
     # Data
     self.filename = None
     self.load_data(data, ent_burnin, pred_burnin)
     # Fantasy particles
     self.neg_nodes = particle
     self.neg_link_counts = zeros(self.model.L)
     for i, n in enumerate(self.neg_nodes):
         assert i == n[0]
         for label in n[1]:  # Count outgoing links only, and assume we have entire graphs (similarly, only outgoing links are observed)
             self.neg_link_counts[label] += 1
     self.K = len(self.neg_nodes)
     self.neg_ents = random.binomial(1, self.model.C/self.model.D, (self.K, self.model.D))
Esempio n. 24
0
def train_test_divide(mat, percent=0.9):
    M, N = mat.shape
    test_set = np.array(rnd.binomial(1.0, 1.0 - percent, size=M * N), dtype=bool)
    test_set_mat = np.unravel_index(test_set, (M, N))
    test_val_mat = mat[test_set_mat].copy()
    mat[test_set_mat] = 0
    return test_set_mat, test_val_mat
Esempio n. 25
0
File: tm.py Progetto: sbos/twtm
def generate_docs(phi, ndocs, nwords_per_doc, alpha=0.1, p0=0.8):
    K, V = phi.shape

    theta = np.zeros((ndocs, K), dtype=float)

    switch = np.append([0], binomial(1, p0, ndocs - 1))
    switch = switch == 0

    samples = dirichlet([alpha] * K, size=int(switch.sum()))
    theta[switch] = samples

    last_theta = None
    for t in xrange(0, ndocs):
        if switch[t] == True:
            last_theta = theta[t]
            continue

        theta[t] = last_theta

    def gen_z(theta):
        z = np.repeat(np.arange(K),
            multinomial(nwords_per_doc, theta, size=1)[0])
        np.random.shuffle(z)
        return z 

    z = np.apply_along_axis(gen_z, 1, theta)

    def gen_w(z):
        return np.random.multinomial(1, phi[z]).nonzero()[0][0]

    w = np.vectorize(gen_w)(z)

    return w, z, theta, switch
Esempio n. 26
0
def sim(N, p0, r):
    '''simulate from Wright Fisher with pure drift'''
    p = p0
    for _ in range(r):
        p = nprand.binomial(N, p, 1)
        p = p[0] / float(N)
    return p
Esempio n. 27
0
File: tm.py Progetto: sbos/twtm
    def theta_t(th, n, p):
        pt = pt_t(th, n, p)
        if binomial(1, pt) == 1:
            return (th, pt, np.log(pt))

        tt = dirichlet(alpha + n, 1)[0]
        return (tt, pt, np.log(1-pt) + dir_logpdf(tt, alpha + n))
Esempio n. 28
0
 def randStats(self):
     randOB = binomial(self.pa, self.stats[4])
     self.randStats[4] = randOB/self.pa #get rand OBP
     self.randStats[0] = randOB/self.ob * self.stats[0] # get rand R
     self.randStats[1] = randOB/self.ob * self.stats[1] # get rand HR
     self.randStats[2] = randOB/self.ob * self.stats[2] # get rand RBI
     self.randStats[3] = randOB/self.ob * self.stats[3] # get rand SB
Esempio n. 29
0
def prob_thin_mask(counts,pdict,rnd_pct=0.05):
    '''Generate a boolean mask given:
        pdict: Probability dictionary from build_prob_dict.
        counts: A list of derived allele counts.
        rnd_pct: Random probability of thinning out.
    Returned outmask[i] is True if site i should be kept.
    Can be added (with and) to nlsmask to generate a final mask.
    Note: This function also filters out non-segregating sites.
    '''
    cond = lambda x: binomial(1,x)
    cond2 = lambda: binomial(1,1.0-rnd_pct)
    test = lambda x: True if cond(x) and cond2() else False
    outmask = []
    for i,count in enumerate(counts):
        outmask.append(test(pdict[count]))
    return np.array(outmask,dtype=bool)
Esempio n. 30
0
def make_noisy_probs(exact, noise_type, noise):
    """Noisify probabilities
    Args: exact - 2D np.array, rows are options, cols are ppl
          model_params: dict
    Outputs: 2d np.aray, rows are options, cols are ppl
    """
    if noise_type == "noiseless":
        return exact
    elif noise_type == "binomial":
        num_hypothetical_trials = noise
        num_successes = nr.binomial(num_hypothetical_trials, exact[0])
        noisy0 = num_successes / num_hypothetical_trials
        return np.array([noisy0, 1 - noisy0])
    elif noise_type == "beta":
        alpha_beta = jmutils.beta_shape(exact[0], noise)
        noisy0 = nr.beta(*alpha_beta)
        return np.array([noisy0, 1 - noisy0])
    elif noise_type == "truncnorm":
        scale = noise
        noisy0 = truncnorm.rvs(-exact[0] / scale, (1 - exact[0]) / scale, 
                               loc=exact[0], scale=scale)
        return np.array([noisy0, 1 - noisy0])
    elif noise_type == "log_odds":
        lo = np.log(exact[0] / exact[1])
        noisy_lo = nr.normal(lo, noise)
        given_1 = 1 / (math.exp(noisy_lo) + 1)
        return np.array([1 - given_1, given_1])
    else:
        print("Error: meta noise_type not specified correctly")
Esempio n. 31
0
    def test_single_sequence_1Q(self):

        N = 5  # Counts per timestep
        T = 100  # Number of timesteps

        # The confidence of the statistical tests. Here we set it to 0.999, which means that
        # if we detect drift we are 0.999 confident that we haven't incorrectly rejected the
        # initial hypothesis of no drift.
        confidence = 0.999

        # A drifting probability to obtain the measurement outcome with index 1 (out of [0,1])
        def pt_drift(t):
            return 0.5 + 0.2 * np.cos(0.1 * t)

        # A drift-free probability to obtain the measurement outcome with index 1 (out of [0,1])
        def pt_nodrift(t):
            return 0.5

        # If we want the sequence to have a label, we define a list for this (here, a list of length 1).
        # The labels can, but need not be, pyGSTi GateString objects.
        sequences = [
            pygsti.objects.GateString(None, 'Gx(Gi)^64Gx'),
        ]

        # If we want the outcomes to have labels, we define a list for this.
        outcomes = ['0', '1']

        # Let's create some fake data by sampling from these p(t) at integer times. Here we have
        # created a 1D array, but we could have instead created a 1 x 1 x 1 x T array.
        data_1seq_drift = np.array(
            [binomial(N, pt_drift(t)) for t in range(0, T)])
        data_1seq_nodrift = np.array(
            [binomial(N, pt_nodrift(t)) for t in range(0, T)])

        # If we want frequencies in Hertz, we need to specify the timestep in seconds. If this isn't
        # specified, the frequencies are given in 1/timestep with timestep defaulting to 1.
        timestep = 1e-5

        # We hand these 1D arrays to the analysis function, along with the number of counts, and other
        # optional information
        results_1seq_drift = drift.do_basic_drift_characterization(
            data_1seq_drift,
            counts=N,
            outcomes=outcomes,
            confidence=confidence,
            timestep=timestep,
            indices_to_sequences=sequences)
        results_1seq_nodrift = drift.do_basic_drift_characterization(
            data_1seq_nodrift,
            counts=N,
            outcomes=outcomes,
            confidence=confidence,
            timestep=timestep,
            indices_to_sequences=sequences)

        if bMPL:
            results_1seq_drift.plot_power_spectrum()
            results_1seq_nodrift.plot_power_spectrum()

        print(results_1seq_drift.global_pvalue)
        print(results_1seq_nodrift.global_pvalue)

        # The power spectrum obtained after averaging over everthing
        print(results_1seq_drift.global_power_spectrum[:4])
        # The power spectrum obtained after averaging over everthing except sequence label
        print(results_1seq_drift.ps_power_spectrum[0, :4])
        # The power spectrum obtained after averaging over everthing except entity label
        print(results_1seq_drift.pe_power_spectrum[0, :4])
        # The power spectrum obtained after averaging over everthing except sequene and entity label
        print(results_1seq_drift.pspe_power_spectrum[0, 0, :4])
        # The two power spectra obtained after averaging over nothing
        print(results_1seq_drift.pspepo_power_spectrum[0, 0, 0, :4])
        print(results_1seq_drift.pspepo_power_spectrum[0, 0, 1, :4])

        # Lets create an array of the true probability. This needs to be
        # of dimension S x E x M x T
        parray_1seq = np.zeros((1, 1, 2, T), float)
        parray_1seq[0, 0, 0, :] = np.array([pt_drift(t) for t in range(0, T)])
        parray_1seq[0, 0, 1, :] = 1 - parray_1seq[0, 0, 0, :]

        # The measurement outcome index we want to look at (here the esimated p(t)
        # for one index is just 1 - the p(t) for the other index, because we are
        # looking at a two-outcome measurement).
        outcome = 1

        # If we hand the parray to the plotting function, it will also plot
        # the true probability alongside our estimate from the data
        if bMPL:
            results_1seq_drift.plot_estimated_probability(sequence=0,
                                                          outcome=outcome,
                                                          parray=parray_1seq,
                                                          plot_data=True)
Esempio n. 32
0
 def rvs(self, size=None):
     return random.binomial(self.n, self.p, size=size)
 def test_p_zero_stream(self):
     # Regression test for gh-14522.  Ensure that future versions
     # generate the same variates as version 1.16.
     np.random.seed(12345)
     assert_array_equal(random.binomial(1, [0, 0.25, 0.5, 0.75, 1]),
                        [0, 0, 0, 1, 1])
Esempio n. 34
0
def main(split):
    kept_indices = None
    for quality in ("LR", "HR"):
        img_folder = '../../datasets/{}_{}_bicLRx4/{}/x4/*'.format(  # glob matching pattern
            dataset_name, split, quality)
        lmdb_save_path = '../../datasets/{}_{}_bicLRx4/{}/x4{}.lmdb'.format(
            dataset_name, split, quality,
            removed_fraction if removed_fraction else "")
        meta_info = {'name': 'DIV2K800_sub_GT'}
        mode = 2  # 1 for reading all the images to memory and then writing to lmdb (more memory);
        # 2 for reading several images and then writing to lmdb, loop over (less memory)
        batch = 1000  # Used in mode 2. After batch images, lmdb commits.
        ###########################################
        if not lmdb_save_path.endswith('.lmdb'):
            raise ValueError("lmdb_save_path must end with \'lmdb\'.")
        #### whether the lmdb file exist
        if osp.exists(lmdb_save_path):
            print(
                'Folder [{:s}] already exists. Exit...'.format(lmdb_save_path))
            sys.exit(1)
        img_list = sorted(glob.glob(img_folder))

        if removed_fraction and split == 'valid':
            if kept_indices is None:
                kept_indices = binomial(1, (1 - removed_fraction),
                                        len(img_list))
            img_list = [
                item for k, item in enumerate(img_list) if kept_indices[k]
            ]
        if mode == 1:
            print('Read images...')
            dataset = [cv2.imread(v, cv2.IMREAD_UNCHANGED) for v in img_list]
            data_size = sum([img.nbytes for img in dataset])
        elif mode == 2:
            print('Calculating the total size of images...')
            data_size = sum(os.stat(v).st_size for v in img_list)
        else:
            raise ValueError('mode should be 1 or 2')

        key_l = []
        resolution_l = []
        pbar = ProgressBar(len(img_list))
        env = lmdb.open(lmdb_save_path, map_size=data_size * 10)
        txn = env.begin(write=True)  # txn is a Transaction object
        for i, v in enumerate(img_list):
            pbar.update('Write {}'.format(v))
            base_name = osp.splitext(osp.basename(v))[0]
            key = base_name.encode('ascii')
            data = dataset[i] if mode == 1 else cv2.imread(
                v, cv2.IMREAD_UNCHANGED)
            if data.ndim == 2:
                H, W = data.shape
                C = 1
            else:
                H, W, C = data.shape
            txn.put(key, data)
            key_l.append(base_name)
            resolution_l.append('{:d}_{:d}_{:d}'.format(C, H, W))
            # commit in mode 2
            if mode == 2 and i % batch == 1:
                txn.commit()
                txn = env.begin(write=True)

        txn.commit()
        env.close()

        print('Finish writing lmdb.')

        #### create meta information
        # check whether all the images are the same size
        same_resolution = (len(set(resolution_l)) <= 1)
        if same_resolution:
            meta_info['resolution'] = [resolution_l[0]]
            meta_info['keys'] = key_l
            print(
                'All images have the same resolution. Simplify the meta info...'
            )
        else:
            meta_info['resolution'] = resolution_l
            meta_info['keys'] = key_l
            print(
                'Not all images have the same resolution. Save meta info for each image...'
            )

        #### pickle dump
        pickle.dump(meta_info,
                    open(osp.join(lmdb_save_path, 'meta_info.pkl'), "wb"))
        print('Finish creating lmdb meta info.')
Esempio n. 35
0
                    info += rv.cov()
            elif ties == 'cox':
                raise NotImplementedError('Cox tie breaking method not \
implemented')
            else:
                raise NotImplementedError('tie breaking method not recognized')
        return score

if __name__ == '__main__':
    import numpy.random as R
    n = 100
    X = np.array([0] * n + [1] * n)
    b = 0.4
    lin = 1 + b * X
    Y = R.standard_exponential((2 * n, )) / lin
    delta = R.binomial(1, 0.9, size=(2 * n, ))

    subjects = [Observation(Y[i], delta[i]) for i in range(2 * n)]
    for i in range(2 * n):
        subjects[i].X = X[i]

    import scikits.statsmodels.sandbox.formula as F
    x = F.Quantitative('X')
    f = F.Formula(x)

    c = CoxPH(subjects, f)

    #    c.cache()
    # temp file cleanup doesn't work on windows
    c = CoxPH(subjects, f, time_dependent=True)
    c.cache()  #this creates  tempfile cache,
Esempio n. 36
0
 def _rvs(self, n, p):
     return mtrand.binomial(n, p, self._size)
Esempio n. 37
0
 def get_binomial_mab_env(params: Sequence[Tuple[int, float]]) -> 'MABEnv':
     return MABEnv(
         [lambda n=n, p=p: float(binomial(n, p, 1)[0]) for n, p in params])
Esempio n. 38
0
 def next_state(self, state: State) -> State:
     up_move: int = binomial(1, self.up_prob(state), 1)[0]
     return Process3.State(num_up_moves=state.num_up_moves + up_move,
                           num_down_moves=state.num_down_moves + 1 -
                           up_move)
Esempio n. 39
0
 def next_state(self, state: State) -> State:
     up_move: int = binomial(1, self.up_prob(state), 1)[0]
     return Process2.State(price=state.price + up_move * 2 - 1,
                           is_prev_move_up=bool(up_move))
Esempio n. 40
0
 def next_state(self, state: State) -> State:
     up_move: int = binomial(1, self.up_prob(state), 1)[0]
     return Process1.State(price=state.price + up_move * 2 - 1)
Esempio n. 41
0
def ar_trace(frame: int, pfire: float, g: np.ndarray):
    S = random.binomial(n=1, p=pfire, size=frame).astype(np.float)
    C = apply_arcoef(S, g)
    return C, S
def binomial_sigma(p):
	sample = binomial(n=1, p=p)
	return sample
Esempio n. 43
0
#产生10个【1,6)之间浮点型随机数array
5 * random.random(10) + 1
random.uniform(1, 6, 10)

#产生10个[1,6)之间的整型随机数
random.randint(1, 6, 10)
'''产生2*5的标准正态分布样本
array([[-0.676922  ,  0.61167629],
       [ 1.03099952,  0.93128012],
       [-0.83921752, -0.30921238],
       [ 0.33126343,  0.97554513],
       [-0.47917424, -0.18565898]])'''
random.normal(size=(5, 2))

#产生5个,n=5,p=0.5的二项分布样本 array([2, 2, 3, 3, 4])
random.binomial(n=5, p=0.5, size=5)

# 从a中有回放的随机采样7个 array([0, 2, 4, 2, 0, 4, 9])
a = np.arange(10)
random.choice(a, 7)
#从a中无放回的随机采样7个 array([4, 7, 8, 3, 5, 1, 0])
random.choice(a, 7, replace=False)

# 对a进行乱序并返回一个新的array
b = random.permutation(a)

# 对a进行in-place乱序
random.shuffle(a)

# 生成一个长度为9的随机bytes序列并作为str返回
# b'=\xa6\xaeK\xb8\xbf&\xa2\xf4'
Esempio n. 44
0
 def get_bernoulli_mab_env(probs: Sequence[float]) -> 'MABEnv':
     return MABEnv([lambda p=p: float(binomial(1, p, 1)[0]) for p in probs])
Esempio n. 45
0
def generate_curia_synthetic_data(
        # Binary or continious
        binary_treatment: bool = True,
        binary_outcome: bool = False,

        # Number of records
        n_train: int = 1000,
        n_test: int = 1000,

        # Number of features to generate by type
        binary_dim: int = 5,
        uniform_dim: int = 5,
        normal_dim: int = 5,

        # Features to have effect on ITE, outcome and treatment propensity
        n_confounders: int = 2,
        n_features_outcome: int = 3,
        n_features_treatment_effect: int = 3,
        n_features_propensity: int = 3,

        # outcome_noise_sd
        outcome_noise_sd: int = 1,

        # Features to drop
        missing_data_scaler: float = 0.5,

        # Treatment share scaler
        treatment_share_scaler: float = 0.05,

        # Random seed
        seed: int = 42) -> object:
    #############################################################
    # Initiate variables and make some checks
    #############################################################

    # Sum train and test together for now
    n_total = n_train + n_test

    # Calculate actual values for the number of the missing features
    n_features_to_drop_outcome_not_counfounders = math.floor(
        (n_features_outcome - n_confounders) * missing_data_scaler)
    n_features_to_drop_treatment_effect_not_counfounders = math.floor(
        (n_features_treatment_effect - n_confounders) * missing_data_scaler)
    n_features_to_drop_confounders = math.floor(n_confounders *
                                                missing_data_scaler)
    n_features_to_drop_propensity = math.floor(n_features_propensity *
                                               missing_data_scaler)

    # create empty dataframe
    modeling_df = pd.DataFrame()

    #############################################################
    # Generate features
    #############################################################

    np.random.seed(seed)

    # Generate Age - we will add mean=70 and sd=30 later to avoid high influence of this variable
    modeling_df['age'] = normal(loc=0, scale=1, size=n_total)

    # Generate features with uniform distribution - will multiply to 10 later
    for i in range(0, uniform_dim):
        modeling_df['sdoh_' +
                    str(i)] = np.ceil(uniform(size=n_total) * 10) / 10

    # Generate features with bernoulli distribution
    binary_coefs = uniform(size=binary_dim)
    for i in range(0, binary_dim):
        binary_coef = binary_coefs[i]
        modeling_df['binary_flag_' + str(i)] = binomial(n=1,
                                                        p=binary_coef,
                                                        size=n_total)

    # Generate features with normal distribution
    multivariate_df = pd.DataFrame(
        multivariate_normal(np.zeros(normal_dim), np.diag(np.ones(normal_dim)),
                            n_total),
        columns=['vector_' + str(i) for i in range(0, normal_dim)])
    modeling_df = pd.concat([modeling_df, multivariate_df], axis=1)

    # Extract name of the features
    features = pd.Series(modeling_df.columns)

    #############################################################
    # Sample features for the treatment effect and the outcomes
    #############################################################

    # sample features for the confounders
    confounders_features = features.sample(n_confounders, random_state=1)
    outcome_features_not_confounders = features[
        ~features.isin(confounders_features)].sample(n_features_outcome -
                                                     n_confounders,
                                                     random_state=1)
    outcome_features = pd.concat(
        [outcome_features_not_confounders, confounders_features])

    # sample features for the treatment effect
    treatment_effect_features_not_confounders = features[
        ~features.isin(outcome_features)].sample(n_features_treatment_effect -
                                                 n_confounders,
                                                 random_state=1)
    treatment_effect_features = pd.concat(
        [treatment_effect_features_not_confounders, confounders_features])

    # sample features for the propensity score
    propensity_score_features = features.sample(n_features_propensity,
                                                random_state=1)

    #############################################################
    # Generate outcomes
    #############################################################

    # Generate coefficients
    beta_outcome = normal(0, 1, n_features_outcome)

    # Generate outcomes
    modeling_df['y0'] = np.dot(modeling_df[outcome_features],
                               beta_outcome) + normal(0, outcome_noise_sd)

    #############################################################
    # Generate treatment effect
    #############################################################

    # Generate coeficients
    beta_te = normal(0, 1, n_features_treatment_effect)

    # Generate outcomes
    modeling_df['true_ite'] = np.dot(modeling_df[treatment_effect_features],
                                     beta_te)

    #############################################################
    # Generate propensity score
    #############################################################

    # Generate coeficients for propensity score
    # Draw coefficients from beta distributions
    beta_propensity_score = normal(0, 1, n_features_propensity)

    # Generate propensity score and rescale it again from 0 to 1
    modeling_df['true_treatment_propensity'] = np.dot(
        modeling_df[propensity_score_features], beta_propensity_score)

    # Center the distribution first
    modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] - \
        modeling_df['true_treatment_propensity'].mean()

    # Rescale to -1 to +1
    modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] / \
        modeling_df['true_treatment_propensity'].abs().max()

    # Rescale to get treatment_share_scaler
    modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] * \
        min(treatment_share_scaler, 1 - treatment_share_scaler)

    # Move to the right
    modeling_df['true_treatment_propensity'] = modeling_df['true_treatment_propensity'] + \
        treatment_share_scaler

    #############################################################
    # Generate treatment
    #############################################################

    if binary_treatment:
        modeling_df['treatment'] = binomial(
            n=1, p=modeling_df['true_treatment_propensity'], size=n_total)
    else:
        modeling_df['treatment'] = modeling_df['true_treatment_propensity']

    #############################################################
    # Generate outcome with treatment effect
    #############################################################

    modeling_df['y1'] = modeling_df['y0'] + modeling_df['true_ite']
    modeling_df['y'] = modeling_df['y0'] + \
        modeling_df['true_ite'] * modeling_df['treatment']

    # Rescale from 0 to 1
    y_min = modeling_df[['y', 'y0', 'y1']].min().min()
    y_max = modeling_df[['y', 'y0', 'y1']].max().max()
    scale_factor = 1 / (y_max - y_min)
    modeling_df['y'] = (modeling_df['y'] - y_min) * scale_factor
    modeling_df['y0'] = (modeling_df['y0'] - y_min) * scale_factor
    modeling_df['y1'] = (modeling_df['y1'] - y_min) * scale_factor

    modeling_df['true_ite_rescaled'] = modeling_df['true_ite'] * scale_factor
    modeling_df['true_ite'] = modeling_df['y1'] - \
        modeling_df['y0']  # modeling_df['true_ite'] * scale_factor

    # If binary - rescale to [0,1] and use as probability to generate bernoulli outcome
    if binary_outcome:
        modeling_df['y'] = binomial(n=1, p=modeling_df['y'], size=n_total)

    #############################################################
    # Features final adjustments
    #############################################################

    # Rescale age feature
    modeling_df['age'] = np.where(modeling_df['age'] * 30 + 70 < 50, 50,
                                  modeling_df['age'] * 30 + 70)

    # Rescale SDOH features
    for i in range(0, uniform_dim):
        modeling_df['sdoh_' + str(i)] = modeling_df['sdoh_' + str(i)] * 10

    #############################################################
    # Drop features
    #############################################################

    # features_to_drop_outcome_not_counfounders
    features_to_drop_outcome_not_counfounders = outcome_features_not_confounders.sample(
        n_features_to_drop_outcome_not_counfounders, random_state=1)

    # features_to_drop_treatment_effect_not_confounders
    features_to_drop_treatment_effect_not_confounders = treatment_effect_features_not_confounders.sample(
        n_features_to_drop_treatment_effect_not_counfounders, random_state=1)

    # features_to_drop_confounders
    features_to_drop_confounders = confounders_features.sample(
        n_features_to_drop_confounders, random_state=1)

    # features_to_drop_confounders
    features_to_drop_propensity = propensity_score_features.sample(
        n_features_to_drop_propensity, random_state=1)

    # Now drop all those features
    all_features_to_drop = pd.concat([
        features_to_drop_outcome_not_counfounders,
        features_to_drop_treatment_effect_not_confounders,
        features_to_drop_confounders, features_to_drop_propensity
    ]).drop_duplicates()

    for col in all_features_to_drop:
        #         print('Dropping {} from the columns'.format([col]))
        assert (
            col
            in modeling_df), 'All features to drop should be in the featureset'
        del modeling_df[col]

    #############################################################
    # Return results
    #############################################################

    # Randomly select train and test
    y = modeling_df['y']
    t = modeling_df['treatment']
    true_ite = modeling_df['true_ite']
    true_treatment_propensity = modeling_df['true_treatment_propensity']
    X = modeling_df.drop([
        'y', 'y0', 'y1', 'treatment', 'true_ite', 'true_treatment_propensity',
        'true_ite_rescaled'
    ],
                         axis=1)

    X, Xte, T, _, Y, _, _, ITEte = train_test_split(X.to_numpy(),
                                                    t.to_numpy(),
                                                    y.to_numpy(),
                                                    true_ite.to_numpy(),
                                                    test_size=0.3,
                                                    random_state=seed)
    Xtr, Xval, Ttr, Tval, Ytr, Yval = train_test_split(X,
                                                       T,
                                                       Y,
                                                       test_size=0.5,
                                                       random_state=seed)

    data = (Xtr, Xval, Xte, Ttr, Tval, Ytr, Yval, ITEte)

    return data
Esempio n. 46
0
    def run_model(self):

        ## initialize data structure

        self.res = np.zeros([self.duration, 12], dtype=np.float32)

        self.res[0, 0] = self.nf1
        self.res[0, 1] = self.nf2
        self.res[0, 2] = self.nf3
        self.res[0, 3] = self.nm1
        self.res[0, 4] = self.nm2
        self.res[0, 5] = self.nm3
        self.res[0, 6] = self.vac3
        self.res[0, 7] = self.vac2
        self.res[0, 8] = self.vac1
        self.res[0, 9] = self.female_promotion_probability_1
        self.res[0, 10] = self.female_promotion_probability_2
        self.res[0, 11] = np.float32(
            sum(list([self.nf1, self.nf2, self.nf3])) / sum(
                list([
                    self.nf1, self.nf2, self.nf3, self.nm1, self.nm2, self.nm3
                ])))

        hiring_rate_female_level_1 = self.bf1
        hiring_rate_female_level_2 = self.bf2
        hiring_rate_female_level_3 = self.bf3
        attrition_rate_female_level_1 = self.df1
        attrition_rate_female_level_2 = self.df2
        attrition_rate_female_level_3 = self.df3
        attrition_rate_male_level_1 = self.dm1
        attrition_rate_male_level_2 = self.dm2
        attrition_rate_male_level_3 = self.dm3
        probability_of_outside_hire_level_3 = self.phire3
        probability_of_outside_hire_level_2 = self.phire2
        male_promotion_probability_1_2 = self.male_promotion_probability_1
        male_promotion_probability_2_3 = self.male_promotion_probability_2
        for i in range(1, self.duration):
            # initialize variables for this iteration

            prev_number_of_females_level_1 = self.res[i - 1, 0]
            prev_number_of_females_level_2 = self.res[i - 1, 1]
            prev_number_of_females_level_3 = self.res[i - 1, 2]
            prev_number_of_males_level_1 = self.res[i - 1, 3]
            prev_number_of_males_level_2 = self.res[i - 1, 4]
            prev_number_of_males_level_3 = self.res[i - 1, 5]
            prev_number_of_vacancies_level_3 = self.res[i - 1, 6]
            prev_number_of_vacancies_level_2 = self.res[i - 1, 7]
            prev_number_of_vacancies_level_1 = self.res[i - 1, 8]
            prev_promotion_rate_female_level_1 = self.female_promotion_probability_1
            prev_promotion_rate_female_level_2 = self.female_promotion_probability_2
            if np.isnan(prev_promotion_rate_female_level_1):
                prev_promotion_rate_female_level_1 = 0
            if np.isnan(prev_promotion_rate_female_level_2):
                prev_promotion_rate_female_level_2 = 0
            prev_gender_proportion_of_department = np.float32(
                sum(
                    list([
                        prev_number_of_females_level_1,
                        prev_number_of_females_level_2,
                        prev_number_of_females_level_3
                    ])) / (sum(
                        list([
                            prev_number_of_females_level_1,
                            prev_number_of_females_level_2,
                            prev_number_of_females_level_3,
                            prev_number_of_males_level_1,
                            prev_number_of_males_level_2,
                            prev_number_of_males_level_3
                        ]))))

            # Process Model

            # first both female and males leave the department according to binomial probability.

            female_attrition_level_3 = binomial(prev_number_of_females_level_3,
                                                attrition_rate_female_level_3)

            male_attrition_level_3 = binomial(prev_number_of_males_level_3,
                                              attrition_rate_male_level_3)

            # the departures create a set of vacancies. These vacancies are the basis for new hiring
            total_vacancies_3 = female_attrition_level_3 + male_attrition_level_3

            # women are hired first and then men

            hiring_female_3 = binomial(
                total_vacancies_3, probability_of_outside_hire_level_3 *
                hiring_rate_female_level_3)
            hiring_male_3 = binomial(
                max(0, total_vacancies_3 - hiring_female_3),
                probability_of_outside_hire_level_3 *
                (1 - hiring_rate_female_level_3))

            # promotion after hiring level 3

            promotions_female_after_hiring_2_3 = binomial(
                max(prev_number_of_females_level_2,
                    total_vacancies_3 - hiring_female_3 - hiring_male_3),
                prev_promotion_rate_female_level_2)
            # formula should read that either the remaining vacancies or the previous number of males--whichever is smallest. But need to make sure no negative values.
            promotions_of_males_level_2_3 = binomial(
                min(
                    prev_number_of_males_level_2,
                    max(
                        0, total_vacancies_3 - hiring_female_3 -
                        hiring_male_3 - promotions_female_after_hiring_2_3)),
                male_promotion_probability_2_3)
            assert (promotions_of_males_level_2_3 >=
                    0), "promotions_of_males_level_2_3 is negative"

            # attrition at level 2 - either people leave from attrition or promotion

            female_attrition_level_2 = binomial(
                max(
                    0, prev_number_of_females_level_2 -
                    promotions_female_after_hiring_2_3),
                attrition_rate_female_level_2)

            male_attrition_level_2 = binomial(
                max(
                    0, prev_number_of_males_level_2 -
                    promotions_of_males_level_2_3),
                attrition_rate_male_level_2)

            # the departures create a set of vacancies. These vacancies are the basis for new hiring
            total_vacancies_2 = sum(
                list([
                    female_attrition_level_2, male_attrition_level_2,
                    promotions_female_after_hiring_2_3,
                    promotions_of_males_level_2_3
                ]))
            assert (total_vacancies_2 >=
                    0), "total vacancies level 2 is less than zero"

            # TODO set to hiring first

            hiring_female_2 = binomial(
                max(0,
                    total_vacancies_2), probability_of_outside_hire_level_2 *
                hiring_rate_female_level_2)

            hiring_male_2 = binomial(
                max(0, total_vacancies_2 - hiring_female_2),
                1 - probability_of_outside_hire_level_2 *
                hiring_rate_female_level_2)

            promotions_of_females_level_1_2 = binomial(
                min(
                    0,
                    min(prev_number_of_females_level_1,
                        total_vacancies_2 - hiring_female_2 - hiring_male_2)),
                prev_promotion_rate_female_level_1)

            promotions_of_males_level_1_2 = binomial(
                min(
                    0,
                    min(
                        prev_number_of_males_level_1,
                        total_vacancies_2 - hiring_female_2 - hiring_male_2 -
                        promotions_of_females_level_1_2)),
                male_promotion_probability_1_2)

            assert (promotions_of_females_level_1_2 >=
                    0), "promotions of females level 1-2 is negative"
            assert (promotions_of_males_level_1_2 >=
                    0), "promotions of males level 1-2 is negative"

            total_hiring_2 = hiring_female_2 + hiring_male_2

            ## Level 1

            female_attrition_level_1 = binomial(
                max(
                    0, prev_number_of_females_level_1 -
                    promotions_of_females_level_1_2),
                attrition_rate_female_level_1)

            male_attrition_level_1 = binomial(
                max(0, prev_number_of_males_level_1),
                attrition_rate_male_level_1)

            total_vacancies_1 = sum(
                list([
                    female_attrition_level_1, male_attrition_level_1,
                    promotions_of_females_level_1_2,
                    promotions_of_males_level_1_2
                ]))

            hiring_female_1 = binomial(total_vacancies_1,
                                       hiring_rate_female_level_1)
            hiring_male_1 = binomial(
                max(0, total_vacancies_1 - hiring_female_1),
                1 - hiring_rate_female_level_1)

            # Write state variables to array and move to next iteration

            self.res[i, 0] = number_of_females_level_1 = sum(
                list([
                    prev_number_of_females_level_1,
                    neg(female_attrition_level_1),
                    neg(promotions_of_females_level_1_2), hiring_female_1
                ]))

            self.res[i, 1] = number_of_females_level_2 = max(
                0,
                sum(
                    list([
                        prev_number_of_females_level_2,
                        neg(female_attrition_level_2),
                        neg(promotions_female_after_hiring_2_3),
                        promotions_of_females_level_1_2, hiring_female_2
                    ])))
            self.res[i, 2] = number_of_females_level_3 = sum(
                list([
                    prev_number_of_females_level_3,
                    neg(female_attrition_level_3),
                    promotions_female_after_hiring_2_3, hiring_female_3
                ]))

            self.res[i, 3] = number_of_males_level_1 = sum(
                list([
                    prev_number_of_males_level_1,
                    neg(male_attrition_level_1),
                    neg(promotions_of_males_level_1_2), hiring_male_1
                ]))

            self.res[i, 4] = number_of_males_level_2 = sum(
                list([
                    prev_number_of_males_level_2,
                    neg(male_attrition_level_2),
                    neg(promotions_of_males_level_2_3),
                    promotions_of_males_level_1_2, hiring_male_2
                ]))

            self.res[i, 5] = number_of_males_level_3 = sum(
                list([
                    prev_number_of_males_level_3,
                    neg(male_attrition_level_3), promotions_of_males_level_2_3,
                    hiring_male_3
                ]))

            self.res[i, 6] = number_of_vacancies_level_3 = sum(
                list([male_attrition_level_3, female_attrition_level_3]))

            self.res[i, 7] = number_of_vacancies_level_2 = sum(
                list([
                    male_attrition_level_2, female_attrition_level_2,
                    promotions_female_after_hiring_2_3,
                    promotions_of_males_level_2_3
                ]))

            self.res[i, 8] = number_of_vacancies_level_1 = sum(
                list([
                    male_attrition_level_1, female_attrition_level_1,
                    promotions_of_males_level_1_2,
                    promotions_of_females_level_1_2
                ]))

            self.res[
                i,
                9] = promotion_rate_female_level_1 = self.female_promotion_probability_1
            self.res[
                i,
                10] = promotion_rate_women_level_2 = self.female_promotion_probability_2
            self.res[i, 11] = gender_proportion_of_department = np.float32(
                truediv(
                    sum(
                        list([
                            number_of_females_level_1,
                            number_of_females_level_2,
                            number_of_females_level_3
                        ])),
                    sum(
                        list([
                            number_of_females_level_1,
                            number_of_females_level_2,
                            number_of_females_level_3, number_of_males_level_1,
                            number_of_males_level_2, number_of_males_level_3
                        ]))))

        # print(self.res[i,:])
        ## Print Data matrix

        df_ = pd.DataFrame(self.res)
        df_.columns = [
            'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 't3', 't2', 't1', 'prom1',
            'prom2', 'gendprop'
        ]

        recarray_results = df_.to_records(index=True)
        self.run = recarray_results
        return recarray_results
x = random.normal(loc=1, scale=4, size=(2, 3))
print("Normal distribution");print(x)
plt.hist(x, 10)
plt.show()

import matplotlib.pyplot as plt
import seaborn as sns
#sns.distplot(random.normal(size=100), hist=False)
#plt.show()

# Binomial Distribution is a Discrete Distribution. ex binary scenarios toss of a coin
# it has three parameters:n - number of trials.
# p - probability of occurence of each trial (e.g. for toss of a coin 0.5 each).
# size - The shape of the returned array.
from numpy import random
x = random.binomial(n=10, p=0.5, size=10)
# print("binomial distribution");print(x)

from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns

# sns.distplot(random.binomial(n=10, p=0.5, size=100), hist=True, kde=False)
# plt.show()

# Poisson Distribution is a Discrete Distribution.
# It estimates how many times an event can happen in a specified time. e.g. If someone eats twice a day what is probability he will eat thrice?
# It has two parameters: lam - rate or known number of occurences e.g. 2 for above problem.
#size - The shape of the returned array.
from numpy import random
import matplotlib.pyplot as plt
Esempio n. 48
0
def _simulate_claim_data(T, freq, sev, theta, obsFreqs=None):
    # T = integer that corresponds to the number of time period observed
    # freq = claim frequency distribution to be chosen in ("poisson",
    # "binomial", "negative binomial)
    # theta_freq = parameters of the claim frequency distribution
    # sev = claim sizes distribution to be chosen in ("weibuill", "lognormal",
    # "gamma")
    # theta_sev = parameters of the claim sizes distribution

    if freq == "ones":
        freqs = np.ones(T).astype(np.int64)
        thetaSev = theta
    elif freq == "obs":
        freqs = obsFreqs
        thetaSev = theta
    elif freq == "bernoulli":
        p = theta[0]
        thetaSev = theta[1:]
        freqs = np.empty(T, np.int64)
        for t in range(T):
            freqs[t] = rnd.binomial(1, p)
    elif freq == "binomial":
        n, p = theta[0:2]
        thetaSev = theta[2:]
        freqs = np.empty(T, np.int64)
        for t in range(T):
            freqs[t] = rnd.binomial(n, p)
    elif freq == "poisson":
        lam = theta[0]
        thetaSev = theta[1:]
        freqs = np.empty(T, np.int64)
        for t in range(T):
            freqs[t] = rnd.poisson(lam)
    elif freq == "geometric":
        p = theta[0]
        thetaSev = theta[1:]
        freqs = np.empty(T, np.int64)
        for t in range(T):
            freqs[t] = rnd.geometric(1 - p) - 1
    elif freq == "negative binomial":
        a, p = theta[0:2]
        thetaSev = theta[2:]
        freqs = np.empty(T, np.int64)
        for t in range(T):
            freqs[t] = negative_binomial(a, p)
    else:
        return
        # raise Exception(f"Unknown frequency distribution: {freq}")
            
    N = np.sum(freqs)
    
    if sev == "frequency dependent exponential":
        sevs = np.empty(N, np.float64)
        i = 0
        for t in range(T):
            sevs[i:i+freqs[t]] = _simulate_claim_sizes(freqs[t], sev, thetaSev)
            i += freqs[t]
    else:
        sevs = _simulate_claim_sizes(N, sev, thetaSev)

    return freqs, sevs
    def run_model(self):

        ## initialize data structure

        self.res = np.zeros([
            self.duration,
            len(MODEL_RUN_COLUMNS) + len(EXPORT_COLUMNS_FOR_CSV)
        ],
                            dtype=np.float32)

        self.res[0, 0] = self.nf1
        self.res[0, 1] = self.nf2
        self.res[0, 2] = self.nf3
        self.res[0, 3] = self.nm1
        self.res[0, 4] = self.nm2
        self.res[0, 5] = self.nm3
        self.res[0, 6] = 0
        self.res[0, 7] = 0
        self.res[0, 8] = 0
        self.res[0, 9] = self.female_promotion_probability_1
        self.res[0, 10] = self.female_promotion_probability_2
        self.res[0, 11] = np.float32(
            sum(list([self.nf1, self.nf2, self.nf3])) / sum(
                list([
                    self.nf1, self.nf2, self.nf3, self.nm1, self.nm2, self.nm3
                ])))
        self.res[0, 12] = 0
        self.res[0, 13] = self.res[0, 0:6].sum()
        self.res[0, 14:] = 0

        # I assign the state variables to temporary variables. That way I
        # don't have to worry about overwriting the original state variables.

        hiring_rate_female_level_1 = self.bf1
        hiring_rate_female_level_2 = self.bf2
        hiring_rate_female_level_3 = self.bf3
        attrition_rate_female_level_1 = self.df1
        attrition_rate_female_level_2 = self.df2
        attrition_rate_female_level_3 = self.df3
        attrition_rate_male_level_1 = self.dm1
        attrition_rate_male_level_2 = self.dm2
        attrition_rate_male_level_3 = self.dm3
        probability_of_outside_hire_level_3 = self.phire3
        probability_of_outside_hire_level_2 = self.phire2
        female_promotion_probability_1_2 = self.female_promotion_probability_1
        female_promotion_probability_2_3 = self.female_promotion_probability_2
        department_size_upper_bound = self.upperbound
        department_size_lower_bound = self.lowerbound
        variation_range = self.variation_range
        unfilled_vacanies = 0
        change_to_level_1 = 0
        change_to_level_2 = 0
        change_to_level_3 = 0

        for i in range(1, self.duration):
            # initialize variables for this iteration

            prev_number_of_females_level_1 = self.res[i - 1, 0]
            prev_number_of_females_level_2 = self.res[i - 1, 1]
            prev_number_of_females_level_3 = self.res[i - 1, 2]
            prev_number_of_males_level_1 = self.res[i - 1, 3]
            prev_number_of_males_level_2 = self.res[i - 1, 4]
            prev_number_of_males_level_3 = self.res[i - 1, 5]
            prev_number_of_vacancies_level_3 = self.res[i - 1, 6]
            prev_number_of_vacancies_level_2 = self.res[i - 1, 7]
            prev_number_of_vacancies_level_1 = self.res[i - 1, 8]
            prev_promotion_rate_female_level_1 = self.female_promotion_probability_1
            prev_promotion_rate_female_level_2 = self.female_promotion_probability_2
            department_size = self.res[i - 1, 0:6].sum()

            # Process Model

            # Determine department size variation for this timestep

            # first both female and males leave the department according to binomial probability.

            female_attrition_level_3 = binomial(prev_number_of_females_level_3,
                                                attrition_rate_female_level_3)

            male_attrition_level_3 = binomial(prev_number_of_males_level_3,
                                              attrition_rate_male_level_3)

            # the departures create a set of vacancies. These vacancies are the basis for new hiring
            total_vacancies_3 = female_attrition_level_3 + \
                                male_attrition_level_3 + change_to_level_3

            # women are hired first and then men
            hiring_female_3 = binomial(
                max(0,
                    total_vacancies_3), probability_of_outside_hire_level_3 *
                hiring_rate_female_level_3)

            hiring_male_3 = binomial(
                max(0, total_vacancies_3 - hiring_female_3),
                probability_of_outside_hire_level_3 *
                (1 - hiring_rate_female_level_3))

            total_hiring_3 = hiring_female_3 + hiring_male_3

            # level 3 vacancies that are not filled by new hires create opportunities
            # for promotion from level 2. Again women are promoted first and men second.
            # Also note the error trap that if we try to promote more professors from
            # level 2 than there exist at level 2, then we will prevent this from happening.

            vacancies_remaining_after_hiring_3 = total_vacancies_3 - total_hiring_3

            potential_promotions_after_hiring_3 = max(
                0, vacancies_remaining_after_hiring_3)

            promotions_of_females_level_2_3 = binomial(
                min(potential_promotions_after_hiring_3,
                    prev_number_of_females_level_2),
                female_promotion_probability_2_3)

            promotions_of_males_level_2_3 = binomial(
                max(
                    0,
                    min(
                        vacancies_remaining_after_hiring_3 -
                        promotions_of_females_level_2_3,
                        prev_number_of_males_level_2)),
                1 - female_promotion_probability_2_3)

            # attrition at level 2 - either people leave from attrition or promotion

            female_attrition_level_2 = binomial(
                max(
                    0, prev_number_of_females_level_2 -
                    promotions_of_females_level_2_3),
                attrition_rate_female_level_2)

            male_attrition_level_2 = binomial(
                max(
                    0, prev_number_of_males_level_2 -
                    promotions_of_males_level_2_3),
                attrition_rate_male_level_2)

            # the departures create a set of vacancies. These vacancies are the basis for new hiring
            total_vacancies_2 = sum(
                list([
                    female_attrition_level_2, male_attrition_level_2,
                    promotions_of_females_level_2_3,
                    promotions_of_males_level_2_3, change_to_level_2
                ]))

            hiring_female_2 = binomial(
                max(0,
                    total_vacancies_2), probability_of_outside_hire_level_2 *
                hiring_rate_female_level_2)
            hiring_male_2 = binomial(
                max(0, total_vacancies_2 - hiring_female_2),
                probability_of_outside_hire_level_2 *
                (1 - hiring_rate_female_level_2))

            total_hiring_2 = hiring_female_2 + hiring_male_2

            vacancies_remaining_after_hiring_2 = total_vacancies_2 - total_hiring_2

            potential_promotions_after_hiring_2 = max(
                0, vacancies_remaining_after_hiring_2)

            promotions_of_females_level_1_2 = binomial(
                max(
                    0,
                    min(potential_promotions_after_hiring_2,
                        prev_number_of_females_level_1)),
                female_promotion_probability_1_2)

            promotions_of_males_level_1_2 = binomial(
                max(
                    0,
                    min(
                        vacancies_remaining_after_hiring_2 -
                        promotions_of_females_level_1_2,
                        prev_number_of_females_level_1)),
                probability_of_outside_hire_level_2 *
                (1 - female_promotion_probability_1_2))

            ## Level 1

            female_attrition_level_1 = binomial(
                max(
                    0, prev_number_of_females_level_1 -
                    promotions_of_females_level_1_2),
                attrition_rate_female_level_1)

            male_attrition_level_1 = binomial(
                max(
                    0, prev_number_of_males_level_1 -
                    promotions_of_males_level_1_2),
                attrition_rate_male_level_1)

            total_vacancies_1 = sum(
                list([
                    female_attrition_level_1, male_attrition_level_1,
                    promotions_of_females_level_1_2,
                    promotions_of_males_level_1_2, change_to_level_1
                ]))

            hiring_female_1 = binomial(max(0, total_vacancies_1),
                                       hiring_rate_female_level_1)

            hiring_male_1 = binomial(
                max(0, total_vacancies_1 - hiring_female_1),
                1 - hiring_rate_female_level_1)

            # Write state variables to array and move to next iteration

            self.res[i, 0] = number_of_females_level_1 = sum(
                list([
                    prev_number_of_females_level_1,
                    neg(female_attrition_level_1),
                    neg(promotions_of_females_level_1_2), hiring_female_1
                ]))

            assert (number_of_females_level_1 >=
                    0), "negative number of females 1"

            self.res[i, 1] = number_of_females_level_2 = max(
                0,
                sum(
                    list([
                        prev_number_of_females_level_2,
                        neg(female_attrition_level_2),
                        neg(promotions_of_females_level_2_3),
                        promotions_of_females_level_1_2, hiring_female_2
                    ])))

            self.res[i, 2] = number_of_females_level_3 = sum(
                list([
                    prev_number_of_females_level_3,
                    neg(female_attrition_level_3),
                    promotions_of_females_level_2_3, hiring_female_3
                ]))

            self.res[i, 3] = number_of_males_level_1 = sum(
                list([
                    prev_number_of_males_level_1,
                    neg(male_attrition_level_1),
                    neg(promotions_of_males_level_1_2), hiring_male_1
                ]))

            self.res[i, 4] = number_of_males_level_2 = sum(
                list([
                    prev_number_of_males_level_2,
                    neg(male_attrition_level_2),
                    neg(promotions_of_males_level_2_3),
                    promotions_of_males_level_1_2, hiring_male_2
                ]))

            self.res[i, 5] = number_of_males_level_3 = sum(
                list([
                    prev_number_of_males_level_3,
                    neg(male_attrition_level_3), promotions_of_males_level_2_3,
                    hiring_male_3
                ]))

            self.res[i, 6] = sum(
                list([male_attrition_level_3, female_attrition_level_3]))

            self.res[i, 7] = sum(
                list([
                    male_attrition_level_2, female_attrition_level_2,
                    promotions_of_females_level_2_3,
                    promotions_of_males_level_2_3
                ]))

            self.res[i, 8] = sum(
                list([
                    male_attrition_level_1, female_attrition_level_1,
                    promotions_of_males_level_1_2,
                    promotions_of_females_level_1_2
                ]))

            self.res[i, 9] = self.female_promotion_probability_1
            self.res[i, 10] = self.female_promotion_probability_2
            self.res[i, 11] = np.float32(
                truediv(
                    sum(
                        list([
                            number_of_females_level_1,
                            number_of_females_level_2,
                            number_of_females_level_3
                        ])),
                    sum(
                        list([
                            number_of_females_level_1,
                            number_of_females_level_2,
                            number_of_females_level_3, number_of_males_level_1,
                            number_of_males_level_2, number_of_males_level_3
                        ]))))
            unfilled_vacanies = abs(department_size - self.res[i, 0:6].sum())

            self.res[i, 12] = unfilled_vacanies
            department_size = self.res[i, 0:6].sum()
            self.res[i, 13] = department_size
            self.res[i, 14] = hiring_female_3
            self.res[i, 15] = hiring_male_3
            self.res[i, 16] = hiring_female_2
            self.res[i, 17] = hiring_male_2
            self.res[i, 18] = hiring_female_1
            self.res[i, 19] = hiring_male_1
            self.res[i, 20] = 0
            self.res[i, 21] = 0
            self.res[i, 22] = promotions_of_females_level_2_3
            self.res[i, 23] = promotions_of_males_level_2_3
            self.res[i, 24] = promotions_of_females_level_1_2
            self.res[i, 25] = promotions_of_males_level_1_2
            self.res[i, 26] = hiring_rate_female_level_1
            self.res[i, 27] = hiring_rate_female_level_2
            self.res[i, 28] = hiring_rate_female_level_3
            self.res[i, 29] = 1 - hiring_rate_female_level_1
            self.res[i, 30] = 1 - hiring_rate_female_level_2
            self.res[i, 31] = 1 - hiring_rate_female_level_3
            self.res[i, 32] = attrition_rate_female_level_1
            self.res[i, 33] = attrition_rate_female_level_2
            self.res[i, 34] = attrition_rate_female_level_3
            self.res[i, 35] = attrition_rate_male_level_1
            self.res[i, 36] = attrition_rate_male_level_2
            self.res[i, 37] = attrition_rate_male_level_3
            self.res[i, 38] = 1
            self.res[i, 39] = probability_of_outside_hire_level_2
            self.res[i, 40] = probability_of_outside_hire_level_3
            self.res[i, 41] = female_promotion_probability_1_2
            self.res[i, 42] = female_promotion_probability_2_3
            self.res[i, 43] = 1 - female_promotion_probability_1_2
            self.res[i, 44] = 1 - female_promotion_probability_2_3
            self.res[i, 45] = department_size_upper_bound
            self.res[i, 46] = department_size_lower_bound
            self.res[i, 47] = variation_range
            self.res[i, 48] = self.duration

            # this produces an array of values. Then I need to assign the
            # values to levels. So if I have say a range of variation of 5. I
            #  will get something like [-1,0,1,-1,0] or something. I need to
            # turn this into something like [2,-1,0]. That means randomly
            # assigning the values in the array to levels.

            flag = False
            while flag == False:

                changes = np.random.choice([-1, 0, 1], variation_range)

                levels = np.random.choice([1, 2, 3], variation_range)  #
                # random level
                # choice

                # need to test whether the candidate changes keep the
                # department size within bounds.
                # print(["old dept size:", department_size,
                #        "new dept size:", self.res[i, 0:6].sum(),
                #        "candidate:", department_size +
                #        changes.sum(),
                #        " added postions: ", changes.sum(),
                #        "unfilled ", unfilled_vacanies])
                if (department_size + changes.sum() <=
                        department_size_upper_bound
                        and department_size + changes.sum() >=
                        department_size_lower_bound):
                    change_to_level_3 = np.int(
                        changes[np.where(levels == 3)[0]].sum())
                    change_to_level_2 = np.int(
                        changes[np.where(levels == 2)[0]].sum())
                    change_to_level_1 = np.int(
                        changes[np.where(levels == 1)[0]].sum())
                    flag = True

                if (department_size > department_size_upper_bound):
                    change_to_level_3 = 0
                    change_to_level_2 = 0
                    change_to_level_1 = 0

                    flag = True

                if department_size < department_size_lower_bound:
                    changes = np.ones(variation_range)
                    change_to_level_3 = np.int(
                        changes[np.where(levels == 3)[0]].sum())
                    change_to_level_2 = np.int(
                        changes[np.where(levels == 2)[0]].sum())
                    change_to_level_1 = np.int(
                        changes[np.where(levels == 1)[0]].sum())
                    flag = True

        df_ = pd.DataFrame(self.res)
        df_.columns = MODEL_RUN_COLUMNS + EXPORT_COLUMNS_FOR_CSV

        recarray_results = df_.to_records(index=True)
        self.run = recarray_results
        return recarray_results
Esempio n. 50
0
But we can see that similar to binomial for a large enough poisson distribution
 it will become similar to normal distribution with certain std dev and mean.'''

sns.distplot(random.normal(loc=50, scale=7, size=1000),
             hist=False,
             label='normal')
sns.distplot(random.poisson(lam=50, size=1000), hist=False, label='poisson')
plt.show()
'''Difference Between Poisson and Binomial Distribution
The difference is very subtle it is that, 
binomial distribution is for discrete trials, whereas poisson distribution is for continuous trials.

But for very large n and near-zero p 
binomial distribution is near identical to poisson distribution such that n * p is nearly equal to lam.'''

sns.distplot(random.binomial(n=1000, p=0.01, size=1000),
             hist=False,
             label='binomial')
sns.distplot(random.poisson(lam=10, size=1000), hist=False, label='poisson')
plt.show()

print('Uniform Distribution')
'''Used to describe probability where every event has equal chances of occuring.'''
x = random.uniform(size=(2, 3))
print(x)
sns.distplot(random.uniform(size=1000), hist=False)
plt.show()

print('Logistic Distribution')
'''Logistic Distribution is used to describe growth.
Used extensively in machine learning in logistic regression, neural networks etc.'''
Esempio n. 51
0
# binomial_distribution is a discrete distribution

# it describes the outcome of binary scenarios,
# e.g. toss of a coin, it will either be head or till
# it has three parameters
# n - number of trials
# p - probability of occurence of each trial(e.g for toss of a coin = 0.5 for each)
# size - the shape of the returned array

# Discrebe Distribution -
# the distribution is defined at separate set of events.

from numpy import random

x = random.binomial(n=10, p=0.5, size=10)

print(x)

# visualization of binomial distribution

import matplotlib.pyplot as plt
import seaborn as sns

sns.distplot(random.binomial(n=10, p=0.5, size=1000), hist=True, kde=False)
plt.show()

# difference between normal and binomial distribution
"""
The main difference is that normal distribution is continuous 
whereas binomial is discrete, but if there are enough data points
it will be quite similar to normal distribution will certain loc, and scale
Esempio n. 52
0
from numpy import random
seed = int(input("Enter seed: "))
num = int(input("Enter the number of trials: "))
prob = float(input("Enter the probability: "))
x = random.binomial(n=num, p=prob, size=seed)
print(x)
Esempio n. 53
0
        ],
                       action_feature_funcs=[],
                       dnn_spec=DNNSpec(
                           neurons=[2],
                           hidden_activation=DNNSpec.relu,
                           hidden_activation_deriv=DNNSpec.relu_deriv,
                           output_activation=DNNSpec.sigmoid,
                           output_activation_deriv=DNNSpec.sigmoid_deriv))
    ]
    # noinspection PyPep8
    this_score_func = lambda a, p: [
        1. / p[0] if a == (10, ) else 1. / (p[0] - 1.)
    ]
    # noinspection PyPep8
    sa_gen_func = lambda p, n: [((10, ) if x == 1 else (-10, ))
                                for x in binomial(1, p[0], n)]
    pg_obj = PolicyGradient(mdp_rep_for_rl_pg=mdp_rep_obj,
                            reinforce=reinforce_val,
                            num_batches=num_batches_val,
                            batch_size=batch_size_val,
                            num_action_samples=num_action_samples_val,
                            max_steps=max_steps_val,
                            actor_lambda=actor_lambda_val,
                            critic_lambda=critic_lambda_val,
                            score_func=this_score_func,
                            sample_actions_gen_func=sa_gen_func,
                            fa_spec=fa_spec_val,
                            pol_fa_spec=pol_fa_spec_val)

    def policy_func(i: int) -> Mapping[Tuple[int], float]:
        if i == 1:
Esempio n. 54
0
# automate shift maker
def random_diag_sin_shifts(n,
                           min_c=0,
                           max_c=10,
                           min_a=.01,
                           max_a=.05,
                           min_f=15,
                           max_f=20):
    """Generate systematics shift sine functions.

    Each one follows a formula:
        f(x) = x + ampl * sin(freq * x)
    """
    C = runif(n, min_c, max_c)
    A = runif(n, min_a, max_a)
    F = runif(n, min_f, max_f)
    return tuple([
        lambda x: c + x + a * np.sin(2 * pi * x / f)
        for c, a, f in zip(C, A, F)
    ])


if name == "__main__":
    rt = draw_rt()
    rts = draw_runs(rt, 10)
    shifts = random_diag_sin_shifts(10)
    rtss = act(shifts, rts)

    # add big jumps here.
    npr.binomial(10000, .01)
Esempio n. 55
0
def evaluation(silent, simulation_rounds, malicious_devices_prop,
               devices_per_cluster, eval_time, malicious_frequency_multiplier,
               threshold_ratio, periodicity_error, results_file):
    global counter_lock, counter

    show_pbar = not silent and counter_lock is not None and counter is not None

    aggregated_data_sent_array = []
    data_sent_threshold_array = []
    num_malicious_array = []
    for i in range(simulation_rounds):
        # Updating progress bar
        if show_pbar:
            with counter_lock:
                counter.value += 1

        # Creating the cluster (devices in a flow)
        iot_devices = []
        distribution_device_classes = [
            random_int(0,
                       len(device_classes) - 1)
            for _ in range(devices_per_cluster)
        ]
        distribution_malicious = binomial(1, malicious_devices_prop,
                                          devices_per_cluster)
        for type_id, is_malicious in zip(distribution_device_classes,
                                         distribution_malicious):
            iot_devices.append(device_classes[type_id](
                is_malicious, eval_time, periodicity_error,
                malicious_frequency_multiplier))

        # Preloading random arrays
        uniform_first_tx = list(uniform(0, 1, devices_per_cluster))
        gaussian_jitter = list(normal(0, 1, devices_per_cluster))

        # Computing estimated throughput (data sent within an evaluation period)
        data_sent_threshold = 0
        distribution_devices_counter = dict(
            Counter(distribution_device_classes))
        for device_type_id, device_type_count in distribution_devices_counter.items(
        ):
            data_sent_threshold += device_type_count * threshold_ratio * (
                eval_time /
                get_class_from_type_id(device_type_id).legacy_period) * (
                    get_class_from_type_id(device_type_id).data_burst)

        # Simulation
        aggregated_data_sent = 0
        for device in iot_devices:
            # First tranmsission in U(0, tx_period)
            # Assuming tx_period < eval_time
            accumulated_time = uniform_first_tx.pop() * device.period
            # [2,n-1] transmissions
            num_txs = 1 + int((eval_time - accumulated_time) / device.period)
            remainder = (eval_time - accumulated_time) % device.period
            # Last transmission (with jitter)
            standard_deviation = device.period * (periodicity_error / 2) / 2
            last_transmission_time = standard_deviation * gaussian_jitter.pop(
            ) + device.period
            if (last_transmission_time < remainder):
                num_txs += 1
            # Adding the data of this device to the aggregated data sent in this period
            aggregated_data_sent += num_txs * device.data_burst

        aggregated_data_sent_array.append(aggregated_data_sent)
        data_sent_threshold_array.append(data_sent_threshold)
        num_malicious_array.append(sum(distribution_malicious))

    # Results
    result = {
        "eval_time":
        eval_time,
        "malicious_devices_prop":
        malicious_devices_prop,
        "devices_per_cluster":
        devices_per_cluster,
        "malicious_frequency_multiplier":
        malicious_frequency_multiplier,
        "threshold_ratio":
        threshold_ratio,
        "periodicity_error":
        periodicity_error,
        "simulation_rounds":
        simulation_rounds,
        "aggregated_data_sent_array":
        aggregated_data_sent_array,
        "data_sent_threshold_array":
        data_sent_threshold_array,
        "malicious_quarantined_array": [
            int(num_malicious) if
            (aggregated_data_sent > data_sent_threshold) else 0
            for aggregated_data_sent, data_sent_threshold, num_malicious in
            zip(aggregated_data_sent_array, data_sent_threshold_array,
                num_malicious_array)
        ],
        "malicious_array": [int(x) for x in num_malicious_array],
        "legitimate_quarantined_array": [
            int(devices_per_cluster - num_malicious) if
            (aggregated_data_sent > data_sent_threshold) else 0
            for aggregated_data_sent, data_sent_threshold, num_malicious in
            zip(aggregated_data_sent_array, data_sent_threshold_array,
                num_malicious_array)
        ],
        "legitimate_array":
        [devices_per_cluster - int(x) for x in num_malicious_array]
    }
    return result
Esempio n. 56
0
def bino_mutual(filename, periods):
    print('Binomial simulation of mutualistic interaction %s %ld' %
          (filename, periods))
    tinic = time()

    filename_a = filename + '_a.txt'
    minputchar_a = dlmreadlike(filename_a)
    nrows_a = len(minputchar_a)
    ncols_a = len(minputchar_a[0])
    for i in range(nrows_a):
        for j in range(ncols_a):
            minputchar_a[i][j] = float(minputchar_a[i][j])
    numspecies_a = ncols_a
    print("numspecies a %d" % numspecies_a)
    K_a = []
    Nindividuals_a = []
    rowNindividuals_a = []
    r_a = []

    filename_b = filename + '_b.txt'
    minputchar_b = dlmreadlike(filename_b)
    nrows_b = len(minputchar_b)
    ncols_b = len(minputchar_b[0])
    for i in range(nrows_b):
        for j in range(ncols_b):
            minputchar_a[i][j] = float(minputchar_b[i][j])
    numspecies_b = nrows_b - 3
    print("numspecies b %d" % numspecies_b)
    K_b = []
    Nindividuals_b = []
    rowNindividuals_b = []
    r_b = []

    for n in range(numspecies_a):
        rowNindividuals_a.append(int(minputchar_a[nrows_a - 3][n]))
        K_a.append(int(minputchar_a[nrows_a - 2][n]))
        r_a.append(minputchar_a[nrows_a - 1][n])
    Nindividuals_a.append(rowNindividuals_a)
    period_year = 365
    for k in range(periods - 1):
        rowNi = []
        for n in range(numspecies_a):
            rperiod = float(r_a[n] / period_year)
            # Variation due to malthusian parameter r
            incNmalth = binomial(Nindividuals_a[k][n], 1 - exp(-1 * rperiod))
            # Second term of logistic equation
            incNlogistic = binomial((Nindividuals_a[k][n]**2) / K_a[n],
                                    1 - exp(-1 * rperiod))
            # Terms due to other species
            incNOtherspecies = 0
            for j in range(numspecies_a):
                incNOtherspecies = incNOtherspecies + binomial(
                    round(Nindividuals_a[k][j] * Nindividuals_a[k][n] /
                          K_a[n]), 1 - exp(-1 * rperiod * minputchar_a[n][j]))
            rowNi.append(
                round(Nindividuals_a[k][n] + incNmalth - incNlogistic +
                      incNOtherspecies))
        Nindividuals_a.append(rowNi)
    tfin = time()
    print("Elapsed time %f s" % (tfin - tinic))
    dlmwritelike(filename_a, periods, Nindividuals_a, 'bino')
    plt.plot(Nindividuals_a)
    plt.show()
Esempio n. 57
0
 def pull(self) -> float:
     return binomial(1, self.param)
Esempio n. 58
0
 def sample_multiple(self, actions, n):
     """ draws n samples from the reward distributions of the specified actions. """
     return binomial(n, self.expected_rewards[actions])
Esempio n. 59
0
def run_vi(data_set,
           alpha,
           holdout,
           sigma_a=0.1,
           sigma_n=0.5,
           iter_count=50,
           truncation=15,
           init_Phi=1.0):
    data_count = data_set.shape[0]
    dim_count = data_set.shape[1]
    elbo_set = np.zeros([iter_count])
    nu_set = list()  # nu are the varitional parameters on Z
    phi_set = list()  # phi mean param of A
    Phi_set = list()  # Phi cov param of A, per feat -> same for all dims
    tau_set = list()  # tau are the variational parameters on the stick betas
    iter_times = list()

    N, D = data_set.shape
    K = truncation
    feature_count = truncation

    # Initialize objects
    Z = npr.binomial(1, 0.5, [data_count, feature_count])
    nu = npr.uniform(0, 1, [data_count, feature_count])
    phi = np.zeros((feature_count, dim_count))
    Phi = [init_Phi * np.eye(dim_count) for k in range(feature_count)]
    tau = [np.ones(feature_count), np.ones(feature_count)]

    # Optimization loop
    t_start = time.clock()

    try:
        for vi_iter in range(iter_count):

            # Update Phi and phi
            for k in range(feature_count):
                coeff = 1 / (1 / (sigma_a**2) + np.sum(nu[:, k]) /
                             (sigma_n**2))
                Phi[k] = coeff * np.eye(dim_count)

                phi_sums = np.dot(nu, phi)
                phi_sums_cur = phi_sums - np.outer(nu[:, k], phi[k])
                phi[k] = coeff * (1 / (sigma_n**2) *
                                  np.dot(nu[:, k], (data_set - phi_sums_cur)))
                assert len(phi[k]) == dim_count

            # Get the intermediate variables
            qks = []
            Elogsticks = []
            for k in range(feature_count):
                qk, Elogstick = compute_q_Elogstick(tau, k)
                qks.append(qk)
                Elogsticks.append(Elogstick)

            # Update tau, nu
            for k in range(int(feature_count)):

                # update nu_k
                theta = np.sum([
                    sps.digamma(tau[0][i]) - sps.digamma(tau[0][i] + tau[1][i])
                    for i in range(k)
                ]) - Elogsticks[k]
                theta += -0.5 / (sigma_n**2) * (np.trace(Phi[k]) +
                                                np.dot(phi[k], phi[k]))
                phi_sums = np.dot(nu, phi)  # recompute for each nu_k
                phi_sums_cur = phi_sums - np.outer(nu[:, k], phi[k])
                theta += 1 / (sigma_n**2) * np.dot(phi[k],
                                                   (data_set - phi_sums_cur).T)

                nu[:, k] = 1 / (1 + np.exp(-theta))

                # update tau
                tau[0][k] = alpha + np.sum(nu[:, k:]) + sum(
                    [(data_count - np.sum(nu[:, m])) * np.sum(qks[m][k + 1:m])
                     for m in range(k + 1, feature_count)])
                tau[1][k] = 1 + sum(
                    [(data_count - np.sum(nu[:, m])) * qks[m][k]
                     for m in range(k, feature_count)])

            # Compute the ELBO
            elbo = compute_elbo(data_set, alpha, sigma_a, sigma_n, phi, Phi,
                                nu, tau)

            H = holdout.shape[0]

            num_Z_samples = 50
            num_A_samples = 5
            num_pi_samples = 10
            total_loss = 0.0

            # initialize memory
            sampled_z_counts = np.zeros(K)
            mses = np.zeros((num_pi_samples, num_Z_samples, num_A_samples, H))

            for pk in range(num_pi_samples):
                vs = np.zeros(K)
                for k in range(K):
                    vs[k] = np.random.beta(tau[0][k], tau[1][k])
                pi = np.cumprod(vs)
                Z_new = np.zeros((H, K))
                for zi in range(num_Z_samples):
                    Z_new = np.random.binomial(1, pi, (H, K))
                    sampled_z_counts += Z_new.sum(0)
                    A = np.zeros((K, D))
                    for ai in range(num_A_samples):
                        for k in range(K):
                            A[k] = phi[k].copy() + np.random.normal(
                                0, Phi[k][0][0], D)
                        X_pred = Z_new.dot(A)
                        diff = (-0.5 *
                                (np.square(X_pred - holdout)) / sigma_n**2 -
                                0.5 * np.log(2 * np.pi) - np.log(sigma_n))
                        mses[pk, zi, ai] += diff.sum(axis=1)
            total_loss = mses.mean()
            lse = (
                logsumexp(mses, axis=(0, 1, 2)) -
                np.log(num_pi_samples * num_A_samples * num_Z_samples)).mean()
            sampled_z_counts /= float(num_Z_samples * H * num_pi_samples)
            # print("z: {}".format(sampled_z_counts))
            # Store things and report
            elbo_set[vi_iter] = elbo
            nu_set.append(nu)
            phi_set.append(phi)
            Phi_set.append(Phi)
            tau_set.append(tau)
            iter_times.append(time.clock() - t_start)
            print(
                "[Epoch: {:<3}]: ELBO: {:<10} | Test Loss: {:<10} | MSE (LSE): {:<10}"
                .format(vi_iter, elbo / (float(N)), -1. * total_loss,
                        -1 * lse))
    except KeyboardInterrupt:
        pass

    return nu_set, phi_set, Phi_set, tau_set, elbo_set, iter_times
Esempio n. 60
0
all_non_differences = []
all_non_opportunities = []
all_core_differences = []
all_core_opportunities = []
median_pNs = []
median_pSs = []

# Plot percentiles of divergence distribution
for species_idx in xrange(0, len(species_names)):

    species_name = species_names[species_idx]

    # Use the Poisson thinning theorem to cut down on
    # non-biological correlations between dS and dN/dS
    # (i.e., fact that dS is in denominator of dN/dS.
    thinned_syn_differences_1 = binomial(
        numpy.array(syn_differences[species_name], dtype=numpy.int32), 0.5)
    thinned_syn_differences_2 = syn_differences[
        species_name] - thinned_syn_differences_1

    pS1s = thinned_syn_differences_1 * 1.0 / (syn_opportunities[species_name] /
                                              2.0)
    pS2s = thinned_syn_differences_2 * 1.0 / (syn_opportunities[species_name] /
                                              2.0)
    pSs = syn_differences[species_name] * 1.0 / syn_opportunities[species_name]
    pNs = non_differences[species_name] * 1.0 / non_opportunities[species_name]
    ptots = (syn_differences[species_name] + non_differences[species_name]
             ) * 1.0 / (syn_opportunities[species_name] +
                        non_opportunities[species_name])

    pseudo_pSs = 1.0 / (syn_opportunities[species_name] / 2.0 +
                        non_opportunities[species_name])