def de_fun(state, control, params, noise):
    '''
    Inputs:
        state: array of state variables [x,y]
        control: control parameter that is to be varied
        params: list of parameter values [kb, knb, rnb, a, eps1, eps2]
    Output:
        array for subsequent state
    '''

    [x, y] = state  # x (y) population after breeding (non-breeding) period
    [kb, knb, rnb, a] = params
    [eps1, eps2] = noise
    rb = control

    # Compute pop size after breeding period season t+1

    # Parameters for negative binomial distribution
    mu = y * np.exp((rb - a * x) * (1 - y / kb))
    p = ke / (ke + mu)

    # Compute pop size
    xnew = nbinom.rvs(ke, p)
    # Compute pop size after non-breeding period season t+1

    ## Parameters for negative binomial distribution
    mu = xnew * np.exp(rnb * (1 - xnew / knb))
    p = ke / (ke + mu)
    ynew = nbinom.rvs(ke, p)

    # Ouput updated state
    return np.array([xnew, ynew])
Beispiel #2
0
def c19_nbinom_rvs(r0, k, size=0):
    """Generates random variates"""
    n, p = c19_nbinom_transform(r0, k)

    if size > 1:
        r = nbinom.rvs(n, p, size=size)
    else:
        r = nbinom.rvs(n, p)
    return r
Beispiel #3
0
    def simulate(self, length: int) -> pd.DataFrame:
        r"""Simulate outbreaks.

        Parameter
        ---------
        length
            Number of weeks to model.

        Returns
        -------
            A ``DataFrame`` of an endemic time series where each row contains the case counts ot this week.
        """
        if self.seed:
            np.random.seed(self.seed)
        mu_s = [
            np.exp(self.baseline_frequency + self.trend * week +
                   self._seasonality(week)) for week in range(length)
        ]
        if self.dispersion == 1:
            cases = [poisson.rvs(mu, size=1)[0] for mu in mu_s]
        else:
            cases = []
            for mu in mu_s:
                r = np.float(mu / (self.dispersion - 1))
                p = r / (r + mu)
                cases.append(nbinom.rvs(r, p, size=1)[0])
        return (pd.DataFrame({
            "n_cases": cases
        }).pipe(add_date_time_index_to_frame).assign(
            timestep=list(range(1, length + 1))))
Beispiel #4
0
 def sample(self,n):
     data = np.zeros(n,dtype=np.int32)
     U = uniform.rvs(size=n)
     poisson_index = U <= self.pi
     data[poisson_index] = poisson.rvs(mu=self.lambda_0,loc=1,size=np.sum(poisson_index))
     data[np.invert(poisson_index)] = nbinom.rvs(n=self.r,p=1-self.p,loc=1,size=n-np.sum(poisson_index))
     return data
Beispiel #5
0
 def randnegbinom(self, mu, sd, size):
         mu = float(mu)
         sd = float(sd)
         r = (mu * mu) / (sd * sd - mu)
         p = 1 - mu / (r + mu)
         result = nbinom.rvs(r, p, size=size)
         return result
def count_regression(n):
    X = [norm.rvs(.4, 1 / 9) for i in range(n)]
    U = [np.exp(1 * x) for x in X]
    p = 1 / 2
    Y = [nbinom.rvs(p / (1 - p) * u, p) for u in U]
    data = [[Y[i], X[i]] for i in range(n)]

    def g(x, theta):
        return (X[0] - np.exp(X[1] * theta)) * X[1]

    def prior(theta):
        return t.pdf(x=theta, df=2.5, loc=0, scale=5)

    x_list = np.arange(-3, 3, .01)
    y_list = [prior(x) for x in x_list]
    plt.plot(x_list, y_list)
    plt.show()

    mh_block_sampler = MH_block_sampler(data, 1, 1, g, prior)
    mh_block_sampler.sample(1000)
    plt.hist([
        mh_block_sampler.thetas[i] for i in range(len(mh_block_sampler.thetas))
    ],
             density=True,
             bins=30)
    plt.show()

    return
Beispiel #7
0
def evaluate_iterated_game(genomes):
    # When using this evaluation function, a list of lists of genomes
    # should be provided to the main 'evaluate' function.
    agents = []
    for g in genomes:
        net = NEAT.NeuralNetwork()
        g.BuildPhenotype(net)
        agents.append(NeuralNetworkAgent(net))

    fitness = 0
    n_total_rounds = 0.0
    p1 = agents[0]
    for p2 in agents[1:]:
        n_rounds = nbinom.rvs(1, 0.02, 1) + 1
        p1.flush()
        p2.flush()
        p1_payoff = 0
        p2_payoff = 0
        for i in range(n_rounds):
            p1_decision = p1.get_action([p1_payoff, p2_payoff]);
            p2_decision = p2.get_action([p2_payoff, p1_payoff]);

            p1_payoff = PAYOFFS[p1_decision][p2_decision]
            p2_payoff = PAYOFFS[p2_decision][p1_decision]

            p1.add_payoff(p1_payoff)
            n_total_rounds += 1.0

    fitness = p1.get_total_payoff() / n_total_rounds
    fitness -= INTEL_PENALTY*(len(p1.net.neurons))
    return fitness
Beispiel #8
0
 def randnegbinom(self, mu, sd, size):
     mu = float(mu)
     sd = float(sd)
     r = (mu * mu) / (sd * sd - mu)
     p = 1 - mu / (r + mu)
     result = nbinom.rvs(r, p, size=size)
     return result
def RandSFCorr(NumShow, SF=None, sfp=None,date=None,CalcNumDuck=True,Randomize=True):
    '''RandSFCorr(NumShow, SF=None, sfp=None,date=None,CalcNumDuck=True)
    Make a random-correction to the number of shows in order to get the number of geoducks.
    *SF is the show-factor.  zero to one.  It takes precedent over show-factor plot.
    *sfp is an instance of SFplot.  It represents a show-factor plot or a list of show-factor plots.
    *date is only relevent if SF is undefined.
    *CalcNumDuck indicates that the number of geoducks in the show-factor plot(s) needs to be re-calculated
    '''
    if SF==1: return(NumShow)

    if (SF==None) and (sfp==None):#ignore show-factor effect if there is no indication of value to use.
        return(NumShow)
    
    if isinstance(SF,(list,ndarray)): return(list(map(lambda sf: RandSFCorr(NumShow, SF=sf, ,CalcNumDuck=CalcNumDuck,Randomize=Randomize)  ,SF)))


    if not(SF==None):#A single deterministic value for show-factor
        if not(Randomize):#deterministic
            return(float(NumShow)/SF)

        #Probabilistic
        result=NumShow+nbinom.rvs(NumShow,SF)
        return(result)


    #Get the show-factor from a set of show-factor data
    sf=sfp.RandSF(date=date,CalcNumDuck=CalcNumDuck)
    result=RandSFCorr(NumShow, SF=sf, CalcNumDuck=CalcNumDuck,Randomize=Randomize)
    return(result)
def neg_bin(mean, var):
    # Where does this is explained?
    p = mean / var
    n = mean * p / (1 - p)

    while(True):
        yield nbinom.rvs(n, p)
Beispiel #11
0
def Mod_NB(x):
    theta = x[:-1]
    r = 1 / x[-1]
    FM = Mod(theta, time_f)
    mu = np.diff(FM)
    p = r / (mu + r)  #mu/(mu+r)
    FM_error = nbinom.rvs(r, p)
    return FM_error
Beispiel #12
0
def generate_negbin(N, r, prior):
    # sample from prior
    theta = prior.rvs()

    # generate samples
    x = nbinom.rvs(r, theta, size=N)

    return theta, x
Beispiel #13
0
 def modelrvs(self, spec):
     # simulate seed  data under model
     r_loc = self.get_r(spec, sim=True)
     for i in range(len(self.sim_data[spec][self.sc])):
         if np.random.random() > self.q0:
             self.data[spec][self.sc][i] = nbinom.rvs(n=r_loc[i], p=self.p0)
         else:
             self.data[spec][self.sc][i] = 0
Beispiel #14
0
 def randnegbinom(self, mu, sd, size):
         mu = float(mu)
         sd = float(sd)
         r = (mu * mu) / (sd * sd - mu)
         p = 1 - mu / (r + mu)
         result = nbinom.rvs(r, p, size=size)
         #print('nbinom', describe(result))
         return result
Beispiel #15
0
def simulator(home_mean, away_mean, niterations):
    # estimates probability of home team win
    home_game_score = [0] * niterations
    away_game_score = [0] * niterations
    home_win = [0] * niterations
    i = 0
    while (i < niterations):
        home_game_score[i] = \
            nbinom.rvs(n = 4.0, p = 4.0/(4.0 + home_mean), size = 1)[0] 
        away_game_score[i] = \
            nbinom.rvs(n = 4.0, p = 4.0/(4.0 + away_mean), size = 1)[0]         
        if (home_game_score[i] > away_game_score[i]):
            home_win[i] = 1
        if ((away_game_score[i] > home_game_score[i]) or \
            (away_game_score[i] < home_game_score[i])):
            i = i + 1 
    n_home_win = sum(home_win)
    return n_home_win / niterations        
Beispiel #16
0
def simulator(home_mean, away_mean, niterations):
    # estimates probability of home team win
    home_game_score = [0] * niterations
    away_game_score = [0] * niterations
    home_win = [0] * niterations
    i = 0
    while (i < niterations):
        home_game_score[i] = \
            nbinom.rvs(n = 4.0, p = 4.0/(4.0 + home_mean), size = 1)[0]
        away_game_score[i] = \
            nbinom.rvs(n = 4.0, p = 4.0/(4.0 + away_mean), size = 1)[0]
        if (home_game_score[i] > away_game_score[i]):
            home_win[i] = 1
        if ((away_game_score[i] > home_game_score[i]) or \
            (away_game_score[i] < home_game_score[i])):
            i = i + 1
    n_home_win = sum(home_win)
    return n_home_win / niterations
Beispiel #17
0
    def samples_deaths(self, new_infections, fatality_ratio, time_to_death,
                       niu, k):
        r"""
        Computes samples for the number of deaths at time step
        :math:`k` in specified region, given the simulated timeline of
        susceptible number of individuals, for all age groups in the model.

        The number of deaths is assumed to be distributed according to
        a negative binomial distribution with mean

        .. math::
            \mu_{r,t_k,i} = p_i \sum_{l=0}^{k} f_{k-l} \delta_{r,t_l,i}^{infec}

        and variance :math:`\mu_{r,t_k,i} (\nu + 1)`, where :math:`p_i` is the
        age-specific fatality ratio for age group :math:`i`, :math:`f_{k-l}`
        is the probability of demise :math:`k-l` days after infection and
        :math:`\delta_{r,t_l,i}^{infec}` is the number of new infections
        in specified region, for age group :math:`i` on day :math:`t_l`.

        It uses an output of the simulation method for the PheSEIRModel,
        taking all the rest of the parameters necessary for the computation
        from the way its simulation has been fitted.

        Parameters
        ----------
        new_infections
            (numpy.array) Number of new infections from the simulation method
            for the PheSEIRModel.
        fatality_ratio
            List of age-specific fatality ratios.
        time_to_death
            List of probabilities of death of individual d days after
            infection.
        niu
            Dispesion factor for the negative binomial distribution.
        k
            Index of day for which we intend to sample the number of deaths for
            by age group.

        Returns
        -------
        Array of log-likelihoods for the obsereved number of deaths for
        each age group in specified region at time :math:`t_k`.

        Notes
        -----
        Always run :meth:`PheSEIRModel.new_infections` and
        :meth:`PheSEIRModel.check_death_format` before running this one.

        """
        self._check_time_step_format(k)

        # Compute mean of negative-binomial
        return nbinom.rvs(
            n=niu *
            self.mean_deaths(fatality_ratio, time_to_death, k, new_infections),
            p=niu / (1 + niu))
Beispiel #18
0
def draw_nbinom_dataset(draw, n_samples):
    """
    Generate random samples from a negative binomial model. Assumes that
    draw is a sample from the model parameter space (probably posterior
    but could be prior) and that the 1st element of the draw is
    the burst rate and the 2nd element is the mean burst size.
    """
    pp_samples = neg_binom.rvs(draw[0], (1 + draw[1])**(-1), size=n_samples)
    return np.unique(pp_samples, return_counts=True)
    def simulate_ge(self, negative_binomial):
        # dimension of initial space (i.e number of genes)
        self.W = np.random.normal(loc=0,
                                  scale=0.5,
                                  size=(self.dim, self.latent))
        self.beta = np.random.normal(loc=0, scale=0.5, size=self.dim)

        #self.W = np.random.normal(loc=0, scale=1.0, size=(self.latent, self.dim))
        #self.beta = np.random.normal(loc=0, scale=1.0, size=self.dim)

        self.mu = np.clip(a=np.exp(self.z @ self.W.T + self.beta),
                          a_min=0,
                          a_max=1e5)

        if negative_binomial:
            print('=== Negative Binomial simulations ===')
            #g = gamma.rvs(self.alpha, scale=self.mu / self.alpha)
            #self.X = np.asarray(poisson(g), dtype=np.float64)
            r, p = convert_params_NB(mu=self.mu, alpha=self.alpha)
            self.X = nbinom.rvs(n=r, p=p)
        else:
            self.X = np.asarray(poisson(self.mu), dtype=np.float64)

        if self.vis:
            ## Poissson distribution
            fig, axes = plt.subplots(
                1,
                1,
                figsize=(14, 8),
                sharey=True,
            )

            bins = np.arange(0, 30, 5)

            cm = plt.cm.get_cmap('RdYlBu_r')

            n, binss, patches = axes.hist(
                self.X,
                bins=bins,
                edgecolor='black',
            )
            # set color of patches
            # scale values to interval [0,1]
            bin_centers = 0.5 * (binss[:-1] + binss[1:])
            col = bin_centers - min(bin_centers)
            col /= max(col)

            for c, p in zip(col, patches):
                plt.setp(p, 'facecolor', cm(c))

            axes.set_title('Histogram of simulated gene expression data')
            plt.ylabel('Counts')
            plt.xlabel('Gene Expression value')
            plt.legend(['gene_' + str(i) for i in list(range(self.dim))],
                       loc='best')
            plt.show()
Beispiel #20
0
    def C1(self):
        print('Subsampling C1.')
        snv = nbinom.rvs(
            self.size, self.size /
            (self.size + self.snv.reshape(3, 3, -1, self.p, self.samples)))
        self._C1 = tf.constant(snv, dtype=self.dtype)
        if self.verbose:
            print('C1:', self._C1.shape)

        return self._C1
Beispiel #21
0
def corner_spread(home_corners, away_corners, corner_mean, niterations):
    random.seed(1234)
    game_home_mean = [0] * niterations
    game_away_mean = [0] * niterations
    game_corner_mean = [0] * niterations
    over_corner_mean_counter = [0] * niterations
    i = 0
    n_count = 4.0
    while i < niterations:
        game_home_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + home_corners), size=1)[0]
        game_away_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + away_corners), size=1)[0]
        game_corner_mean[i] = nbinom.rvs(n=n_count, p=n_count / (n_count + corner_mean), size=1)[0]
        home_plus_away = game_home_mean[i] + game_away_mean[i]
        if home_plus_away > game_corner_mean[i]:
            over_corner_mean_counter[i] = 1
        if (game_corner_mean[i] > home_plus_away) or (game_corner_mean[i] < home_plus_away):
            i += 1
    n_over_corner_mean_count = sum(over_corner_mean_counter)
    return n_over_corner_mean_count / float(niterations)
Beispiel #22
0
def sample_usage():
    t_arr = np.ones(100)
    m = 10
    theta = .7
    ps = t_arr / (t_arr + theta)
    n_arr = nbinom.rvs(m, ps)
    NBinom.loglik(n_arr, t_arr, m, theta)
    NBinom.numeric_grad(n_arr, t_arr, m, theta)
    nb2 = NBinom2(n_arr, t_arr)
    params = nb2.gradient_descent(verbose=True)
    return sum(params - np.array([m, theta])) < 1e-1
Beispiel #23
0
def observationGenerator(states, parameters): 
    #input states is 2D: numStates x dimObs, parameters is 1D
    obs = zeros((states.shape[0], 2))
    for i in range(states.shape[0]):
        #obs[i,:] = transpose(random.multivariate_normal(repeat(states[i,0],1), [[parameters[0] * states[i,0] ** 2]], size = 2))
        p = 1 / (1 + parameters[0] * states[i,0])
        p = minimum(p, 1-1e-7) 
        p = maximum(p, 1e-7)
        n = maximum(1, floor( states[i,0] * p / (1-p) ) ).astype(int32)
        obs[i,:] = nbinom.rvs(n, p, 2)
    return obs
Beispiel #24
0
    def C2(self):

        sub_set = np.ones_like(self.other)
        sub_set[np.where(np.isnan(self.other))] = 0
        self.other[np.where(np.isnan(self.other))] = 0
        self.C2_nans = tf.constant(sub_set, dtype=self.dtype)
        print('Subsampling C2')
        other = nbinom.rvs(self.size, self.size / (self.size + self.other))
        self._C2 = tf.constant(other, dtype=self.dtype)
        if self.verbose:
            print('C2:', self._C2.shape)
        return self._C2
Beispiel #25
0
    def __init__(self):
        # total sessions this user will have:
        self.num_sessions = 1 + int(uniform.rvs() > sessions_zero_inflation
                                    ) * nbinom.rvs(4, beta.rvs(12, 10))
        self.first_session = fuzz_time(local_epoch)

        self.session_starts = [
            fuzz_time(self.first_session) for i in range(self.num_sessions - 1)
        ] + [self.first_session]

        self.next_session = self.first_session
        self.guid = uuid4()

        self._current_cart = 0  # num items currently in cart
Beispiel #26
0
def gentestcase2(nsg=10):
    '''
  The second testcase, 2 samples, control and treatment
  '''
    vark = 0.01
    # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]])
    desmat = np.matrix([[1, 0, 0], [0, 1, 0], [0, 1, 1], [1, 1, 1]])
    (nsample, nbeta) = desmat.shape
    # basic parameters
    sks = SimCaseSimple()
    sks.prefix = 'sample2'
    sks.design_mat = desmat
    sks.beta0 = [random.uniform(3, 10)
                 for i in range(nsg)]  # these are the base
    sks.beta1 = [random.random() * 5
                 for i in range(nbeta)]  # treatments;size: nbeta
    print('beta_0:' + '\t'.join([decformat(x) for x in sks.beta0]))
    print('beta_1:' + '\t'.join([decformat(x) for x in sks.beta1]))
    # mean and variance
    mu0 = [math.exp(t) for t in sks.beta0]  # size: nsg
    tprod = desmat * np.matrix(sks.beta1).getT()  # size: nsample*1
    tprodlist = [x[0] for x in tprod.tolist()]  # size: nsample*1
    sks.mu = [mu0]
    for nr in range(nsample):
        sgi = [math.exp(t + tprodlist[nr]) for t in sks.beta0]
        sks.mu += [sgi]
    # sks.var0=[t+vark*(t*t) for t in sks.mu0]
    sks.var = [[t + vark * (t * t) for t in tl] for tl in sks.mu]
    for i in range(nsample + 1):  # including 1 base and n samples
        print('mu_:' + str(i) + '\t'.join([decformat(x) for x in sks.mu[i]]))
        print('var_:' + str(i) + '\t'.join([decformat(x) for x in sks.var[i]]))
    # parameters for generating NB counts
    #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
    #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
    #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)]
    #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)]
    sks.nb_p = [[sks.mu[t][i] / sks.var[t][i] for i in range(nsg)]
                for t in range(nsample + 1)]
    sks.nb_r = [[
        sks.mu[t][i] * sks.mu[t][i] / (sks.var[t][i] - sks.mu[t][i])
        for i in range(nsg)
    ] for t in range(nsample + 1)]
    #
    #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
    #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)]
    sks.nb_count = [[
        nbinom.rvs(sks.nb_r[t][i], sks.nb_p[t][i]) for i in range(nsg)
    ] for t in range(nsample + 1)]

    return (sks)
def gentestcase3(nsg=10,desmat=None):
  '''
  The third testcase, with efficient 
  '''
  vark=0.01
  effiprob=0.5 # the probability that a sgRNA is efficient
  # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]])
  if desmat==None:
    # desmat=np.matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,1]])
    desmat=np.matrix([[1,0,0,1],[0,1,1,1],[1,0,1,0]]).getT()
  (nsample,nbeta)=desmat.shape
  # basic parameters
  sks=SimCaseSimple()
  sks.prefix='sample3'
  sks.design_mat=desmat
  #sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base 
  #sks.beta1=[(random.random())*5 for i in range(nbeta)] # treatments;size: nbeta
  sks.beta0=[random.uniform(5,10) for i in range(nsg)] # these are the base 
  sks.beta1=[(random.random()*2-1)*5 for i in range(nbeta)] # treatments;size: nbeta
  print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0]))
  print('beta_1:'+'\t'.join([decformat(x) for x in sks.beta1]))
  # efficiency
  sks.isefficient=[ (lambda x: 1 if x>=effiprob else 0)(random.random()) for i in range(nsg)]
  # mean and variance 
  mu0=[math.exp(t) for t in sks.beta0] # size: nsg
  tprod=desmat*np.matrix(sks.beta1).getT() # size: nsample*1  
  tprodlist=[x[0] for x in tprod.tolist()] # size: nsample*1  
  sks.mu=[mu0]
  for nr in range(nsample):
    sgi=[math.exp(sks.beta0[ti]+tprodlist[nr]*sks.isefficient[ti]) for ti in range(nsg)]
    sks.mu+=[sgi]
  # sks.var0=[t+vark*(t*t) for t in sks.mu0]
  sks.var=[[t+vark*(t*t) for t in tl] for tl in sks.mu]
  for i in range(nsample+1): # including 1 base and n samples
    print('mu_:'+str(i)+'\t'.join([decformat(x) for x in sks.mu[i]]))
    print('var_:'+str(i)+'\t'.join([decformat(x) for x in sks.var[i]]))
  # parameters for generating NB counts
  #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
  #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
  #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)]
  #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_p=[[sks.mu[t][i]/sks.var[t][i] for i in range(nsg)] for t in range(nsample+1)]
  sks.nb_r=[[sks.mu[t][i]*sks.mu[t][i]/(sks.var[t][i]-sks.mu[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  # 
  #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
  #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_count=[[nbinom.rvs(sks.nb_r[t][i],sks.nb_p[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  print('efficient: '+' '.join([str(x) for x in sks.isefficient]))
  return (sks)
def gentestcase3(nsg=10,desmat=None):
  '''
  The third testcase, with efficient 
  '''
  vark=0.01
  effiprob=0.5 # the probability that a sgRNA is efficient
  # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]])
  if desmat==None:
    # desmat=np.matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,1]])
    desmat=np.matrix([[1,0,0,1],[0,1,1,1],[1,0,1,0]]).getT()
  (nsample,nbeta)=desmat.shape
  # basic parameters
  sks=SimCaseSimple()
  sks.prefix='sample3'
  sks.design_mat=desmat
  #sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base 
  #sks.beta1=[(random.random())*5 for i in range(nbeta)] # treatments;size: nbeta
  sks.beta0=[random.uniform(5,10) for i in range(nsg)] # these are the base 
  sks.beta1=[(random.random()*2-1)*5 for i in range(nbeta)] # treatments;size: nbeta
  print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0]))
  print('beta_1:'+'\t'.join([decformat(x) for x in sks.beta1]))
  # efficiency
  sks.isefficient=[ (lambda x: 1 if x>=effiprob else 0)(random.random()) for i in range(nsg)]
  # mean and variance 
  mu0=[math.exp(t) for t in sks.beta0] # size: nsg
  tprod=desmat*np.matrix(sks.beta1).getT() # size: nsample*1  
  tprodlist=[x[0] for x in tprod.tolist()] # size: nsample*1  
  sks.mu=[mu0]
  for nr in range(nsample):
    sgi=[math.exp(sks.beta0[ti]+tprodlist[nr]*sks.isefficient[ti]) for ti in range(nsg)]
    sks.mu+=[sgi]
  # sks.var0=[t+vark*(t*t) for t in sks.mu0]
  sks.var=[[t+vark*(t*t) for t in tl] for tl in sks.mu]
  for i in range(nsample+1): # including 1 base and n samples
    print('mu_:'+str(i)+'\t'.join([decformat(x) for x in sks.mu[i]]))
    print('var_:'+str(i)+'\t'.join([decformat(x) for x in sks.var[i]]))
  # parameters for generating NB counts
  #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
  #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
  #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)]
  #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_p=[[sks.mu[t][i]/sks.var[t][i] for i in range(nsg)] for t in range(nsample+1)]
  sks.nb_r=[[sks.mu[t][i]*sks.mu[t][i]/(sks.var[t][i]-sks.mu[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  # 
  #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
  #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_count=[[nbinom.rvs(sks.nb_r[t][i],sks.nb_p[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  print('efficient: '+' '.join([str(x) for x in sks.isefficient]))
  return (sks)
Beispiel #29
0
def get_num_random_interactions(age, random_network_params_dict,
                                child_upper_ix, adult_upper_ix):
    if age <= child_upper_ix:
        mean = random_network_params_dict['CHILD']['mu']
        sd = random_network_params_dict['CHILD']['sigma']
    elif age <= adult_upper_ix:
        mean = random_network_params_dict['ADULT']['mu']
        sd = random_network_params_dict['ADULT']['sigma']
    else:
        mean = random_network_params_dict['ELDERLY']['mu']
        sd = random_network_params_dict['ELDERLY']['sigma']
    p = mean / (sd * sd)
    n = mean * mean / (sd * sd - mean)
    num_interactions = nbinom.rvs(n, p)
    return num_interactions
Beispiel #30
0
def bin_neg_simulation(k, p, n=10000, odd=99):
    """
    Funcao que retorna o valor de uma distribuicao binomial negativa frente aos parametros dados
    """
    if k <= 0:
        return None
    a = 100 - odd
    b = odd + (100 - odd)
    r = nbinom.rvs(k, p, size=n)
    return int(
        scipy.stats.tmean(
            r,
            (scipy.stats.scoreatpercentile(
                r, a), scipy.stats.scoreatpercentile(r, b)),
        ))
Beispiel #31
0
def negbinomial_dist(n,
                     mu,
                     max_value=10,
                     min_value=0,
                     num_values=10000,
                     integers=False):
    """ generate a negative binomial distribution """
    p = 1 / ((mu / n) + 1)
    random_list = nbinom.rvs(n=n, p=p,
                             size=num_values)  # Negative binomial function

    return scale_a_distribution(random_list,
                                integers=integers,
                                max_value=max_value,
                                min_value=min_value)
Beispiel #32
0
def _random_noise(df, noise_factor):
    r"""
    Generates random noise on an observable by a Negative Binomial :math:`NB`.
    References to the negative binomial can be found `here <https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Negative_Binomial_Regression.pdf>`_
    .

    .. math::
        O &\sim NB(\mu=datapoint,\alpha)

    We keep the alpha parameter low to obtain a small variance which should than always be approximately the size of the mean.

    Parameters
    ----------
    df : new_cases , pandas.DataFrame
        Observable on which we want to add the noise

    noise_factor: :math:`\alpha`
        Alpha factor for the random number generation

    Returns
    -------
    array : 1-dim
        observable with added noise
    """
    def convert(mu, alpha):
        r = 1 / alpha
        p = mu / (mu + r)
        return r, 1 - p

    # Apply noise on every column
    for column in df:
        # Get values
        array = df[column].to_numpy()

        for i in range(len(array)):
            if (array[i] == 0) or (np.isnan(array[i])):
                continue
            log.debug(f"Data {array[i]}")
            r, p = convert(array[i], noise_factor)
            log.info(f"n {r}, p {p}")
            mean, var = nbinom.stats(r, p, moments="mv")
            log.debug(f"mean {mean} var {var}")
            array[i] = nbinom.rvs(r, p)
            log.debug(f"Drawn {array[i]}")

        df[column] = array

    return df
Beispiel #33
0
    def play(self, player1, player2, n_rounds=None):
        if n_rounds is None:
            n_rounds = nbinom.rvs(1,0.2,1) + 1

        player1.reset()
        player2.reset()

        total_payoffs = np.zeros((1,2))

        p1_payoff = 0
        p1_action = None
        p1_results = {}

        p2_payoff = 0
        p2_action = None
        p2_results = {}

        payoffs = np.zeros((1,2))

        p1_trace = []
        p2_trace = []
        for i in range(0, n_rounds):
            if i == 0:
                p1_action = player1.get_initial_action()
                p2_action = player2.get_initial_action()
            else:
                p1_results['payoff'] = p1_payoff
                p1_results['action'] = p1_action

                p2_results['payoff'] = p2_payoff
                p2_results['action'] = p2_action

                p1_action = player1.get_action([p1_results, p2_results])
                p2_action = player2.get_action([p2_results, p1_results])

            p1_trace.append(p1_action)
            p2_trace.append(p2_action)

            p1_payoff = self.payoff_matrix[p1_action, p2_action]
            p2_payoff = self.payoff_matrix[p2_action, p1_action]

            total_payoffs += [[p1_payoff, p2_payoff]]

        traces = (p1_trace, p2_trace)
        avg_payoffs = total_payoffs / float(n_rounds)

        return avg_payoffs, traces
Beispiel #34
0
    def inversion(self):

        if (len(self.sequence) <= 1):
            pos = 0
        else:
            pos = random.randint(0, len(self.sequence) - 1)

        cfg = AppSettings()
        p = cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length)

        length = nbinom.rvs(
            1,
            cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length))

        self.sequence = self.sequence[:pos] + \
                        self.sequence[pos:(pos + length)][::-1] + \
                        self.sequence[(pos + length):]
Beispiel #35
0
 def update_state(self, state, rolling_time):
     # land, land_item, add_to_cart, enter_checkout, enter_address, enter_ccard, complete
     r = uniform.rvs()
     new_state = ''
     if state == 'land_homepage':
         if r < .8:
             new_state = 'land_item'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .5))
     elif state == 'land_item':
         if r < .3:
             new_state = 'land_item'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(2, .5))
         elif r < .7:
             new_state = 'add_to_cart'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(2, .5))
         elif self._current_cart > 0 and r < .8:
             new_state = 'enter_checkout'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(4, .5))
     elif state == 'add_to_cart':
         if r < .6:
             new_state = 'enter_checkout'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .5))
         elif r < .9:
             new_state = 'land_item'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(4, .5))
     elif state == 'enter_checkout':
         if r < .98:
             new_state = 'enter_address'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .8))
     elif state == 'enter_address':
         if r < .97:
             new_state = "enter_ccard"
             rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .9))
     elif state == 'enter_ccard':
         if r < .9:
             new_state = 'complete'
             rolling_time += timedelta(minutes=1 + nbinom.rvs(1, .8))
     return new_state, rolling_time
Beispiel #36
0
def gentestcase1(nsg=10):
    '''
  The first testcase, 2 samples, control and treatment
  '''
    vark = 0.01
    # basic parameters
    sks = SimCaseSimple()
    sks.beta0 = [random.uniform(3, 10)
                 for i in range(nsg)]  # these are the base
    sks.beta1 = [random.random() * 5]  # treatment
    print('beta_0:' + '\t'.join([decformat(x) for x in sks.beta0]))
    print('beta_1:' + decformat(sks.beta1[0]))
    # mean and variance
    sks.mu = [[math.exp(t) for t in sks.beta0]]
    for t in sks.beta0:
        sks.mu += [[math.exp(t + sks.beta1[0]) for t in sks.beta0]]
    #sks.var0=[t+vark*(t*t) for t in sks.mu0]
    #sks.var1=[[t+vark*(t*t) for t in sks.mu1[0]]]
    sks.var = [[t + vark * (t * t) for t in sks.mu[i]] for i in range(2)]
    #print('mu_0:'+'\t'.join([decformat(x) for x in sks.mu0]))
    #print('var_0:'+'\t'.join([decformat(x) for x in sks.var0]))
    #print('mu_1:'+'\t'.join([decformat(x) for x in sks.mu1[0]]))
    #print('var_1:'+'\t'.join([decformat(x) for x in sks.var1[0]]))
    # parameters for generating NB counts
    #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
    #sks.nb_p1=[[sks.mu1[0][i]/sks.var1[0][i] for i in range(nsg)]]
    sks.nb_p = [[sks.mu[j][i] / sks.var[j][i] for i in range(nsg)]
                for j in range(2)]
    #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
    #sks.nb_r1=[[sks.mu1[0][i]*sks.mu1[0][i]/(sks.var1[0][i]-sks.mu1[0][i]) for i in range(nsg)]]
    sks.nb_r = [[
        sks.mu[j][i] * sks.mu[j][i] / (sks.var[j][i] - sks.mu[j][i])
        for i in range(nsg)
    ] for j in range(2)]
    #
    #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
    #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[0][i],sks.nb_p1[0][i]) for i in range(nsg)]]
    sks.nb_count = [[
        nbinom.rvs(sks.nb_r[j][i], sks.nb_p[j][i]) for i in range(nsg)
    ] for j in range(2)]
    # design matrix
    # sks.design_mat=getsimpledesignmat(nsg)
    sks.design_mat = np.matrix([[1]])

    return (sks)
def gentestcase2(nsg=10):
  '''
  The second testcase, 2 samples, control and treatment
  '''
  vark=0.01
  # desmat=np.matrix([[0,0],[0,1],[1,0],[1,1]])
  desmat=np.matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,1]])
  (nsample,nbeta)=desmat.shape
  # basic parameters
  sks=SimCaseSimple()
  sks.prefix='sample2'
  sks.design_mat=desmat
  sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base 
  sks.beta1=[random.random()*5 for i in range(nbeta)] # treatments;size: nbeta
  print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0]))
  print('beta_1:'+'\t'.join([decformat(x) for x in sks.beta1]))
  # mean and variance 
  mu0=[math.exp(t) for t in sks.beta0] # size: nsg
  tprod=desmat*np.matrix(sks.beta1).getT() # size: nsample*1  
  tprodlist=[x[0] for x in tprod.tolist()] # size: nsample*1  
  sks.mu=[mu0]
  for nr in range(nsample):
    sgi=[math.exp(t+tprodlist[nr]) for t in sks.beta0]
    sks.mu+=[sgi]
  # sks.var0=[t+vark*(t*t) for t in sks.mu0]
  sks.var=[[t+vark*(t*t) for t in tl] for tl in sks.mu]
  for i in range(nsample+1): # including 1 base and n samples
    print('mu_:'+str(i)+'\t'.join([decformat(x) for x in sks.mu[i]]))
    print('var_:'+str(i)+'\t'.join([decformat(x) for x in sks.var[i]]))
  # parameters for generating NB counts
  #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
  #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
  #sks.nb_p1=[[sks.mu1[t][i]/sks.var1[t][i] for i in range(nsg)] for t in range(nsample)]
  #sks.nb_r1=[[sks.mu1[t][i]*sks.mu1[t][i]/(sks.var1[t][i]-sks.mu1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_p=[[sks.mu[t][i]/sks.var[t][i] for i in range(nsg)] for t in range(nsample+1)]
  sks.nb_r=[[sks.mu[t][i]*sks.mu[t][i]/(sks.var[t][i]-sks.mu[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  # 
  #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
  #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[t][i],sks.nb_p1[t][i]) for i in range(nsg)] for t in range(nsample)]
  sks.nb_count=[[nbinom.rvs(sks.nb_r[t][i],sks.nb_p[t][i]) for i in range(nsg)] for t in range(nsample+1)]
  
  return (sks)
Beispiel #38
0
 def nextGenStoch(self,inf=True,BFOD=True):
     # this function is currently depricated
     # some of these seeds die from BFOD
     if BFOD:
         self.numSeeds = binom.rvs(n=self.numSeeds,p=self.BFODmat)[:]
     # some of remaining seeds get pathogen infected
     germ = []
     if inf:
         infSeeds = binom.rvs(n=self.numSeeds,p=self.pInf)[:]
         unInfSeeds = np.subtract(self.numSeeds,infSeeds)[:]
         # some of seeds germinate
         germ = binom.rvs(p=self.germMatUninf,n=unInfSeeds)[:]
         germ = np.add(germ,binom.rvs(p=self.germMatInf,n=infSeeds)[:])[:]
     else:
         germ = binom.rvs(p=self.germMatUninf,n=self.numSeeds)[:]
     # subtract out germinated seeds from total number of seeds to keep in seed bank
     self.numSeeds = np.subtract(self.numSeeds,germ)[:]      
     # incorporate perennials that survived
     self.species = np.add(binom.rvs(p=self.perSurv,n=self.species.astype(int)).astype(int),binom.rvs(p=self.annuals,n=germ.astype(int).astype(int))).astype(int)[:]
     # competition
     compMat  = np.exp(-1.*np.dot(self.compParams,self.species))
     num_seeds_this_gen = np.zeros(self.numSpec)
     for i in range(self.numSpec):
         num_seeds_this_gen[i] += np.sum(nbinom.rvs(n=self.get_r(compMat,i),p=self.negBinP[i],size=self.species[i]))
     # infection
     if inf:
         num_seeds_this_gen = binom.rvs(p=self.infMat,n=num_seeds_this_gen.astype(int))[:]
     # add in new seeds to model
     self.numSeeds = np.add(self.numSeeds,num_seeds_this_gen.astype(int))[:]
     # add seedlings from perrenials to seedlings, and ones that survived from last year to adults
     self.seedlings = np.array([germ[0],germ[1],0,0,0])[:]
     self.species  = np.add(self.species,binom.rvs(p=self.perTrans[3]/(1.+(self.seedlings[0]+self.seedlings[1])*self.perTrans[0]+(self.species[0]+self.species[1])*self.perTrans[2]
                            +(self.species[2]+self.species[3]+self.species[4])*self.perTrans[1]),n=self.seedlings.astype(int)).astype(int))[:]
     #i = 0
     #for spec in num_seeds_this_gen:
     #    print spec,self.species[i], " ",
     #    i += 1
     #print
     return (self.species,self.numSeeds)
def gentestcase1(nsg=10):
  '''
  The first testcase, 2 samples, control and treatment
  '''
  vark=0.01
  # basic parameters
  sks=SimCaseSimple()
  sks.beta0=[random.uniform(3,10) for i in range(nsg)] # these are the base 
  sks.beta1=[random.random()*5] # treatment
  print('beta_0:'+'\t'.join([decformat(x) for x in sks.beta0]))
  print('beta_1:'+decformat(sks.beta1[0]))
  # mean and variance 
  sks.mu=[[math.exp(t) for t in sks.beta0]]
  for t in sks.beta0:
    sks.mu+=[[math.exp(t+sks.beta1[0]) for t in sks.beta0]]
  #sks.var0=[t+vark*(t*t) for t in sks.mu0]
  #sks.var1=[[t+vark*(t*t) for t in sks.mu1[0]]]
  sks.var=[[t+vark*(t*t) for t in sks.mu[i]] for i in range(2)]
  #print('mu_0:'+'\t'.join([decformat(x) for x in sks.mu0]))
  #print('var_0:'+'\t'.join([decformat(x) for x in sks.var0]))
  #print('mu_1:'+'\t'.join([decformat(x) for x in sks.mu1[0]]))
  #print('var_1:'+'\t'.join([decformat(x) for x in sks.var1[0]]))
  # parameters for generating NB counts
  #sks.nb_p0=[sks.mu0[i]/sks.var0[i] for i in range(nsg)]
  #sks.nb_p1=[[sks.mu1[0][i]/sks.var1[0][i] for i in range(nsg)]]
  sks.nb_p=[[sks.mu[j][i]/sks.var[j][i] for i in range(nsg)] for j in range(2)]
  #sks.nb_r0=[sks.mu0[i]*sks.mu0[i]/(sks.var0[i]-sks.mu0[i]) for i in range(nsg)]
  #sks.nb_r1=[[sks.mu1[0][i]*sks.mu1[0][i]/(sks.var1[0][i]-sks.mu1[0][i]) for i in range(nsg)]]
  sks.nb_r=[[sks.mu[j][i]*sks.mu[j][i]/(sks.var[j][i]-sks.mu[j][i]) for i in range(nsg)] for j in range(2)]
  # 
  #sks.nb_count0=[nbinom.rvs(sks.nb_r0[i],sks.nb_p0[i]) for i in range(nsg)]
  #sks.nb_count1=[[nbinom.rvs(sks.nb_r1[0][i],sks.nb_p1[0][i]) for i in range(nsg)]]
  sks.nb_count=[[nbinom.rvs(sks.nb_r[j][i],sks.nb_p[j][i]) for i in range(nsg)] for j in range(2)]
  # design matrix
  # sks.design_mat=getsimpledesignmat(nsg)
  sks.design_mat=np.matrix([[1]])
  
  return (sks)
Beispiel #40
0
def sample_negative_binomial(p, r):
    return int(nbinom.rvs(r, p))
def meanVar(_files, _gff_file , _output):


	NFILE=len(_files)
	if NFILE == 1:
		sys.stderr.write("Need at least two samples for each group.\n")
		sys.exit(1)
	#####

	_dict_counts = dict() ## dictionary of gene counts
	_genes = HTSeq.GenomicArrayOfSets("auto",stranded=False)
	idx=0
	if MODE == "all-genes":
		for feature in _gff_file:
			if feature.type in GENE:
				_dict_counts[ feature.name ] = [0]*NFILE
				_genes[feature.iv] += feature.name
			if feature.type in TX:
                                if feature.attr["geneID"] not in _dict_counts:
				    _dict_counts[feature.attr["geneID"]] = [0]*NFILE
				    _genes[feature.iv] += feature.attr["geneID"]
	if MODE == "AS-genes":
		## Bug: Does not report last gene in gff if it has at least two transcript
		transcript= set()
		cur_line = None
                last_gene_id = None
		for feature in _gff_file:
			if feature.type in GENE:
				if len(transcript) >1:
					_dict_counts[ cur_line.name ] = [0]*NFILE
					_genes[cur_line.iv] += cur_line.name
				cur_line = feature
				transcript.clear()
                        if feature.type in TX:
                            key = None
                            if "geneID" in feature.attr:
                                key = "geneID"
                            elif "Parent" in feature.attr:
                                key = "Parent"
                            else:
                                sys.stderr.write("transcript line does not have Parent or geneID field\n")

                            if last_gene_id == feature.attr[key]: 
                                transcript.add(feature.attr["ID"])
                            else:
                                if len(transcript) > 1:
                                    if feature.attr[key] not in _dict_counts:
					_dict_counts[feature.attr[key]] = [0]*NFILE
					_genes[feature.iv] +=  feature.attr[key]
                                transcript.clear()
                                transcript.add(feature.attr["ID"])
                                last_gene_id = feature.attr[key]
			if feature.type in EXON:
				transcript.add(feature.attr["Parent"])
        print "num of genes to simulate: ", len(_dict_counts) 
	_file_raw_count = open(_output+'.rawcounts','w')
	_file_nb_count = open(_output+'.nbcounts','w')
	## This loop read through the input list and call countSam for each input file  
	for f in _files:
		sam_file=HTSeq.SAM_Reader(f)
		_dict_counts=countSam(sam_file, _genes,_dict_counts, idx)
		f.close()
		idx += 1
		sys.stderr.write("library %d has generated.\n" % idx)
	## Print raw counts in file specified by <out>
	for key, value in sorted(_dict_counts.iteritems()):
		_file_raw_count.write(key+"\t"+"\t".join(map(str,value))+"\n")
	_file_raw_count.close()
	## calculate group mean and variance
	list_mean = list()
	list_var = list()
	for key, value in sorted(_dict_counts.iteritems()):
		list_mean.append(np.mean(np.array(value)))
		list_var.append(np.var(np.array(value)))
	
	## computer loess esimates	
	## The following code is using rpy2 module
	a = robjects.FloatVector(list_mean)
	b = robjects.FloatVector(list_var)
	df = robjects.DataFrame({"mean": a, "var": b})
	non0_df=df.rx(df.rx2("mean").ro > 0, True) ## subsetting if mean > 0
	loess_fit = r.loess("var ~ mean", data=non0_df, degree=2)
	'''
	#good-of-fit test:
	variance=r.predict(loess_fit, 1000)
	print variance[0]
	print (1000*1000)/(variance[0]-1000)
	'''
	var_pred = r.predict(loess_fit, a)
	# This loop overwrite global variable dict_counts for recoding new count data
	count_idx = 0

	for key, value in sorted(_dict_counts.iteritems()):
		n = math.pow(list_mean[count_idx],2)/(var_pred[count_idx]-list_mean[count_idx])
		n = int(n) # n: number of failures
		if n<=0:
			_dict_counts[key] = [0]*NREPS
		else:
			p = n/float(n+list_mean[count_idx]) # p: prob of success
			_dict_counts[key] = nbinom.rvs(n, p, size=NREPS).tolist()
		count_idx += 1
	#var_pred = r.predict(loess_fit, a)
	for key, value in sorted(_dict_counts.iteritems()):
		_file_nb_count.write(key+"\t"+"\t".join(map(str,value))+"\n")
	_file_nb_count.close()
	_file_raw_count.close()
	return _dict_counts
Beispiel #42
0
import statsmodels.api as sm

# Data
np.random.seed(141)                 # set seed to replicate example
nobs= 2500                          # number of obs in model 

x1 = binom.rvs(1, 0.6, size=nobs)   # categorical explanatory variable
x2 = uniform.rvs(size=nobs)         # real explanatory variable

theta = 0.303
X = sm.add_constant(np.column_stack((x1, x2)))
beta = [1.0, 2.0, -1.5]
xb = np.dot(X, beta)          # linear predictor

exb = np.exp(xb)
nby = nbinom.rvs(exb, theta)

mydata = {}                                # build data dictionary
mydata['N'] = nobs                         # sample size
mydata['X'] = X                            # predictors         
mydata['Y'] = nby                          # response variable
mydata['K'] = len(beta)
  

# Fit
stan_code = """
data{
    int N;
    int K;
    matrix[N,K] X;
    int Y[N];
def draw_from_negative_binomial(mu, phi):
    n = phi
    p = 1/(1+mu/phi)
    return nbinom.rvs(n, p)
Beispiel #44
0
        print"##INFO=<ID=%s,Number=.,Type=%s,Description=\"%s\">" % (INFO.id, INFO.type, INFO.desc)
        for fmat in FORMAT:
            print"##FORMAT=<ID=%s,Number=.,Type=%s,Description=\"%s\">" % (fmat.id, fmat.type, fmat.desc)
        print '##analysis=simulate_dp.py --lambda %f --epsilon %f --dispersion_mean %f --dispersion_sd %f --seed %s' % (args.lamb, args.epsilon, args.dmean, args.dsd, str(args.seed))
        print line
        continue
    fields = line.split()
    genotypes = fields[9:]
    samples = []
    for gt in genotypes:
        # scipy nbinom takes (n, p) as arguments.
        # Convolution: sum_{i=1}{x}nbinom(n, p) = nbinom(xn, p).
        if gt.count('0')==0:
            dp_ref = 0
        else:
            dp_ref = nbinom.rvs(lamb * gt.count('0') / ((d - 1) * 2), 1 / d)
        if gt.count('1')==0:
            dp_alt = 0
        else:
            dp_alt = nbinom.rvs(lamb * gt.count('1') / ((d - 1) * 2), 1 / d)
        dp = dp_ref + dp_alt
        pl = calculate_pl(dp_ref, dp_alt)
        if pl:
            sample = "%s:%i,%i:%i:%i,%i,%i" % (gt, dp_ref, dp_alt, dp, pl[0], pl[1], pl[2])
            samples.append(sample)
        else:
            samples.append(gt)
    info = '%s;Dispersion=%s' % (fields[7], '{0:.3f}'.format(d))
    output = fields[:7] + [info] + ['GT:AD:DP:PL'] + samples
    print '\t'.join(output)
Beispiel #45
0
def main():

    ### get command line options
    options = parse_options(sys.argv)

    ### parse parameters from options object
    CFG = settings.parse_args(options, identity='test')
    CFG['use_exon_counts'] = False

    ### generate output directory
    outdir = os.path.join(options.outdir, 'testing')
    if options.timestamp == 'y':
        outdir = '%s_%s' % (outdir, str(datetime.datetime.now()).replace(' ', '_'))
    if CFG['diagnose_plots']:
        CFG['plot_dir'] = os.path.join(options.outdir, 'plots')
        if not os.path.exists(CFG['plot_dir']):
            os.makedirs(CFG['plot_dir'])

    if options.labelA != 'condA' and options.labelB != 'condB':
        outdir = '%s_%s_vs_%s' % (outdir, options.labelA, options.labelB)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    if CFG['debug']:

        print "Generating simulated dataset"

        npr.seed(23)
        CFG['is_matlab'] = False
        #cov = npr.permutation(20000-20).astype('float').reshape(999, 20)
        #cov = sp.r_[cov, sp.c_[sp.ones((1, 10)) *10, sp.ones((1, 10)) * 500000] + npr.normal(10, 1, 20)]
        #sf = sp.ones((cov.shape[1], ), dtype='float')

        setsize = 50
        ### diff event counts
        cov = sp.zeros((500, 2 * setsize), dtype='int')
        for i in range(10):
            cov[i, :setsize] = nbinom.rvs(30, 0.8, size=setsize)
            cov[i, setsize:] = nbinom.rvs(10, 0.8, size=setsize)
        for i in range(10, cov.shape[0]):
            cov[i, :] = nbinom.rvs(30, 0.8, size=2*setsize)

        ### diff gene expression
        cov2 = sp.zeros((500, 2 * setsize), dtype='int')
        for i in range(20):
            cov2[i, :setsize] = nbinom.rvs(2000, 0.2, size=setsize)
            cov2[i, setsize:] = nbinom.rvs(2000, 0.3, size=setsize)
        for i in range(20, cov2.shape[0]):
            cov2[i, :] = nbinom.rvs(2000, 0.3, size=2*setsize)

        cov = sp.c_[cov, cov2] * 10000

        tidx = sp.arange(setsize)

        sf = npr.uniform(0, 5, 2*setsize)
        sf = sp.r_[sf, sf]

        #dmatrix0 = sp.ones((cov.shape[1], 3), dtype='bool')
        dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='float')
        dmatrix1[:, 0] = 1
        dmatrix1[tidx, 1] = 1
        #dmatrix1[tidx, 2] = 1
        dmatrix1[tidx + (2*setsize), 2] = 1
        dmatrix1[(2*setsize):, 3] = 1
        #dmatrix1[:, 4] = sp.log(sf)
        dmatrix0 = dmatrix1[:, [0, 2, 3]]

        cov = cov * sf
        #sf = sp.ones((cov.shape[1], ), dtype='float')

        pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG)
        pvals_adj = adj_pval(pvals, CFG) 
        pdb.set_trace()
    else:
        val_tag = ''
        if CFG['validate_splicegraphs']:
            val_tag = '.validated'

        if CFG['is_matlab']:
            CFG['fname_genes'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
            CFG['fname_count_in'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.mat' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
        else:
            CFG['fname_genes'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.pickle' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
            CFG['fname_count_in'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.count.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag))

        condition_strains = None
        CFG['fname_exp_hdf5'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
        if os.path.exists(CFG['fname_exp_hdf5']):
            if CFG['verbose']:
                print 'Loading expression counts from %s' % CFG['fname_exp_hdf5']
            IN = h5py.File(CFG['fname_exp_hdf5'], 'r')
            gene_counts = IN['raw_count'][:]
            gene_strains = IN['strains'][:]
            gene_ids = IN['genes'][:]
            IN.close()
        else:
            if options.subset_samples == 'y':
                condition_strains = sp.unique(sp.r_[sp.array(CFG['conditionA']), sp.array(CFG['conditionB'])])
                CFG['fname_exp_hdf5'] = os.path.join(CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.gene_exp.%i.hdf5' % (CFG['confidence_level'], CFG['merge_strategy'], val_tag, hash(tuple(sp.unique(condition_strains))) * -1))
            if os.path.exists(CFG['fname_exp_hdf5']):
                if CFG['verbose']:
                    print 'Loading expression counts from %s' % CFG['fname_exp_hdf5']
                IN = h5py.File(CFG['fname_exp_hdf5'], 'r')
                gene_counts = IN['raw_count'][:]
                gene_strains = IN['strains'][:]
                gene_ids = IN['genes'][:]
                IN.close()
            else:
                gene_counts, gene_strains, gene_ids = get_gene_expression(CFG, fn_out=CFG['fname_exp_hdf5'], strain_subset=condition_strains)

        gene_strains = sp.array([x.split(':')[1] if ':' in x else x for x in gene_strains])

        ### estimate size factors for library size normalization
        sf_ge = get_size_factors(gene_counts, CFG)

        ### get index of samples for difftest
        idx1 = sp.where(sp.in1d(gene_strains, CFG['conditionA']))[0]
        idx2 = sp.where(sp.in1d(gene_strains, CFG['conditionB']))[0]

        ### for TESTING
        #setsize = 100
        #idx1 = sp.arange(0, setsize / 2)
        #idx2 = sp.arange(setsize / 2, setsize)

        ### subset expression counts to tested samples
        gene_counts = gene_counts[:, sp.r_[idx1, idx2]]
        sf_ge = sf_ge[sp.r_[idx1, idx2]]
        #sf = sp.r_[sf, sf]

        ### test each event type individually
        for event_type in CFG['event_types']:

            if CFG['verbose']:
                print 'Testing %s events' % event_type

            CFG['fname_events'] = os.path.join(CFG['out_dirname'], 'merge_graphs_%s_C%i.counts.hdf5' % (event_type, CFG['confidence_level']))

            ### quantify events
            (cov, gene_idx, event_idx, event_ids, event_strains) = quantify.quantify_from_counted_events(CFG['fname_events'], sp.r_[idx1, idx2], event_type, CFG)

            ### estimate size factors
            sf_ev = get_size_factors(sp.vstack(cov), CFG)

            sf = sp.r_[sf_ev, sf_ge]

            assert(sp.all(gene_strains == event_strains))

            ### map gene expression to event order
            curr_gene_counts = gene_counts[gene_idx, :]

            ### filter for min expression
            if event_type == 'intron_retention':
                k_idx = sp.where((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | \
                                 (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac']))[0]
            else:
                k_idx = sp.where(((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac']) | \
                                  (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac'])) & \
                                 (sp.mean(sp.c_[cov[0][:, :idx1.shape[0]], cov[1][:, :idx1.shape[0]]] == 0, axis=1) < CFG['max_0_frac']) & \
                                 (sp.mean(sp.c_[cov[0][:, idx2.shape[0]:], cov[1][:, idx2.shape[0]:]] == 0, axis=1) < CFG['max_0_frac']))[0]
            if CFG['verbose']:
                print 'Exclude %i of %i %s events (%.2f percent) from testing due to low coverage' % (cov[0].shape[0] - k_idx.shape[0], cov[0].shape[0], event_type, (1 - float(k_idx.shape[0]) / cov[0].shape[0]) * 100)
            if k_idx.shape[0] == 0:
                print 'All events of type %s were filtered out due to low coverage. Please try re-running with less stringent filter criteria' % event_type
                continue
           # k_idx = sp.where((sp.mean(sp.c_[cov[0], cov[1]], axis=1) > 2))[0]
           # k_idx = sp.where((sp.mean(cov[0], axis=1) > 2) & (sp.mean(cov[1], axis=1) > 2))[0]
            cov[0] = cov[0][k_idx, :]
            cov[1] = cov[1][k_idx, :]
            curr_gene_counts = curr_gene_counts[k_idx, :]
            event_idx = event_idx[k_idx]
            gene_idx = gene_idx[k_idx]
            event_ids = [x[k_idx] for x in event_ids]

            cov[0] = sp.around(sp.hstack([cov[0], curr_gene_counts]))
            cov[1] = sp.around(sp.hstack([cov[1], curr_gene_counts]))
            cov = sp.vstack(cov)
            event_ids = sp.hstack(event_ids)

            tidx = sp.arange(idx1.shape[0])

        #if CFG['debug']:
        #    for i in range(cov.shape[0]):
        #        fig = plt.figure(figsize=(8, 6), dpi=100)
        #        ax = fig.add_subplot(111)
        #        ax.hist(cov[i, :] * sf, 50, histtype='bar', rwidth=0.8)
        #        #ax.plot(sp.arange(cov.shape[1]), sorted(cov[i, :]), 'bo')
        #        ax.set_title('Count Distribution - Sample %i' % i )
        #        plt.savefig('count_dist.%i.pdf' % i, format='pdf', bbox_inches='tight')
        #        plt.close(fig)

            ### build design matrix for testing
            dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='bool')
            dmatrix1[:, 0] = 1                      # intercept
            dmatrix1[tidx, 1] = 1                   # delta a
            dmatrix1[tidx, 2] = 1                   # delta g
            dmatrix1[tidx + (idx1.shape[0] + idx2.shape[0]), 2] = 1         # delta g
            dmatrix1[(idx1.shape[0] + idx2.shape[0]):, 3] = 1         # is g
            dmatrix0 = dmatrix1[:, [0, 2, 3]]

            ### make event splice forms unique to prevent unnecessary tests
            event_ids, u_idx, r_idx = sp.unique(event_ids, return_index=True, return_inverse=True)
            if CFG['verbose']:
                print 'Consider %i unique event splice forms for testing' % u_idx.shape[0]

            ### run testing
            #pvals = run_testing(cov[u_idx, :], dmatrix0, dmatrix1, sf, CFG, r_idx)
            pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG)
            pvals_adj = adj_pval(pvals, CFG) 

            ### write output
            out_fname = os.path.join(outdir, 'test_results_C%i_%s.tsv' % (options.confidence, event_type))
            if CFG['verbose']:
                print 'Writing test results to %s' % out_fname
            s_idx = sp.argsort(pvals_adj)
            header = sp.array(['event_id', 'gene', 'p_val', 'p_val_adj']) 
            event_ids = sp.array(['%s_%i' % (event_type, i + 1) for i in event_idx], dtype='str')
            if CFG['is_matlab']:
                data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx], 0], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')]
            else:
                data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx]], pvals[s_idx].astype('str'), pvals_adj[s_idx].astype('str')]
            data_out = sp.r_[header[sp.newaxis, :], data_out]
            sp.savetxt(out_fname, data_out, delimiter='\t', fmt='%s')
Beispiel #46
0
from scipy.stats import norm, uniform, nbinom

np.random.seed(1656)                 # set seed to replicate example
N = 2000                             # number of obs in model 
NGroups = 10

x1 = uniform.rvs(size=N)
x2 = uniform.rvs(size=N)

Groups = np.array([200 * [i] for i in range(NGroups)]).flatten()
a = norm.rvs(loc=0, scale=0.5, size=NGroups)
eta = 1 + 0.2 * x1 - 0.75 * x2 + a[list(Groups)]
mu = np.exp(eta)

y = nbinom.rvs(mu, 0.5)


# Code 8.23 Random intercept negative binomial model in Python using Stan
import pystan

X = sm.add_constant(np.column_stack((x1,x2)))
K = X.shape[1]


model_data = {}
model_data['Y'] = y
model_data['X'] = X                              
model_data['K'] = K
model_data['N'] = N
model_data['NGroups'] = NGroups
def generate_count(options):

    numGene = options.numEntry
    numSampleConA = options.numSampleConA
    numSampleConB = options.numSampleConB
    nParamNB = options.nParamNB
    pParamNB = options.pParamNB
    beta1 = options.beta1
    beta2 = options.beta2
    output = options.output

    # First generate the mean read count for each gene. Assume this mean value follows NB distribution (Observed from real data).
    mu = nbinom.rvs(nParamNB, pParamNB, loc=0.0, size=numGene)

    # If the mean of certain genes are 0, change them as 1.
    idx = np.nonzero(mu == 0.0)[0]
    mu[idx] = 1.0

    # Generate dispersions for all genes.
    if not options.dispFile:
        # Generate dispersions as a function of mean count for all genes.
        disper = beta1 / mu + beta2
    else:
        # Load the dispersions to generate the count.
        disper = np.loadtxt(options.dispFile, dtype=float, skiprows=0, usecols=(0,))
        if disper.size != numGene:
            sys.stderr.write('\nError: The number of specified dispersions is not the same with number of genes!\n\n')
            sys.exit()

    # Add Gaussian distributed noise to log(dispersion).
    if options.addDisperError:
        std = options.addDisperError
        errorNorm = norm.rvs(loc=0.0, scale=std, size=numGene)
        disper = np.exp(np.log(disper) + errorNorm)

    muA = mu.copy()
    muB = mu.copy()

    # For some genes, generate read count with different mean value in different conditions.
    if options.numDiff or options.diffFile:

        # Fold change genes are randomly selected, or are chosen as indicated by file.
        if not options.diffFile:
            numDiff = options.numDiff

            # The number of genes showing increased and decreased mean count is equal or 1 less. 
            numDiffUp = numDiff / 2
            numDiffDn = numDiff - numDiffUp

            idx = random.sample(range(numGene), numDiff)
            idxUp = random.sample(idx, numDiffUp)
            idxDn = np.setdiff1d(idx, idxUp)
        else:
            diffInfo = np.loadtxt(options.diffFile, dtype=int, skiprows=0, usecols=(0,))
 
            idxUp = (diffInfo==2).nonzero()[0]
            idxDn = (diffInfo==1).nonzero()[0]

            numDiffUp = idxUp.size
            numDiffDn = idxDn.size

            numDiff = numDiffUp + numDiffDn

        if numDiff > options.numEntry:
            print 'numDiff should be smaller than numGene!'
            sys.exit()

        shapeParam = options.shapeGamma
        scaleParam = options.scaleGamma

        # Assume fold changes of mean count of different genes follow gamma distribution. If fold change value of increased gene set is x, the decreased set is 1/x.
        foldDiffUp = gamma.rvs(a=shapeParam, scale=scaleParam, loc=1.0, size=numDiffUp)
        foldDiffDn = 1.0 / gamma.rvs(a=shapeParam, scale=scaleParam, loc=1.0, size=numDiffDn)

        # Change the mean count of condition A and condition B without changing the overall mean count across the two conditions.
        # (MeanCountA + MeanCountB) / 2 = MeanCountOrigin & MeanCountA * FoldChange = MeanCountB
        # Assume there is a negative correlation between mean count and fold change.
        idxUpMem = np.searchsorted(np.sort(mu[idxUp]), mu[idxUp])
        idxDnMem = np.searchsorted(np.sort(mu[idxDn]), mu[idxDn])

        muAnewUp = 2 * np.sort(mu[idxUp]) / (np.sort(foldDiffUp)[::-1] + 1)
        muBnewUp = muAnewUp * np.sort(foldDiffUp)[::-1]
        muA[idxUp] = muAnewUp[idxUpMem]
        muB[idxUp] = muBnewUp[idxUpMem]

        muAnewDn = 2 * np.sort(mu[idxDn]) / (np.sort(foldDiffDn) + 1)
        muBnewDn = muAnewDn * np.sort(foldDiffDn)
        muA[idxDn] = muAnewDn[idxDnMem]
        muB[idxDn] = muBnewDn[idxDnMem]

    n = 1.0 / disper
    pA = n / (n + muA)
    pB = n / (n + muB)

    numDigits = len(str(numGene))
    with open(output, 'w') as FileOut:
        FileOut.write('Entry\t' + 'conditionA\t'*numSampleConA + 'conditionB\t'*numSampleConB + 'Dispersion\t' + 'MeanCondA\t' + 'MeanCondB\t' + 'MeanFoldChange\t' + 'SetAsDiff\n')
        for i in range(numGene):
            z = numDigits - len(str(i+1))
            name = 'G' + '0'*z + str(i+1)

            # The dispersion parameter (1/n) is the same for both conditions. The probability parameters are different if there is fold change in mean count for different conditions.
            countListA = nbinom.rvs(n[i], pA[i], size=numSampleConA).tolist()
            countListB = nbinom.rvs(n[i], pB[i], size=numSampleConB).tolist()
            countList  = countListA + countListB

            countString = '\t'.join(str(element) for element in countList)

            if not options.numDiff:
                setAsDiff = '-1'
            elif i in idxUp:
                setAsDiff = '1'
            elif i in idxDn:
                setAsDiff = '2'
            else:
                setAsDiff = '0'

            FileOut.write(name + '\t' + countString + '\t' + str(disper[i]) + '\t' + str(np.mean(countListA)) + '\t' + str(np.mean(countListB)) + '\t' + str((np.mean(countListB)+1e-5)/(np.mean(countListA)+1e-5)) + '\t' + setAsDiff + '\n')
def evaluate(pop, intel_penalty=0.01, game=None):
    n = len(pop)
    total_payoffs = np.zeros((1, n))
    rate_of_coop = np.zeros((1,n))

    class NNPlayer(games.Player):
        def __init__(self, nnet):
            self.nnet = nnet

        def reset(self):
            self.nnet.reset()

        def get_initial_action(self):
            return self.nnet.initial_move

        def get_action(self, prev_results):
            prev_payoffs = [r['payoff'] for r in prev_results]
            output = self.nnet.activate(prev_payoffs)
            if type(output) == np.ndarray:
                output = output[0,0]
            else:
                output = output

            if np.random.rand() < output:
                return 1
            else:
                return 0

    for i in range(n):
        for j in range(i+1, n):
            n_rounds = nbinom.rvs(1, 0.2, 1) + 1
            payoffs, traces = game.play(NNPlayer(pop[i]), NNPlayer(pop[j]), n_rounds)
            total_payoffs[0,i] += payoffs[0,0]
            total_payoffs[0,j] += payoffs[0,1]

            rate_of_coop[0, i] += np.mean(traces[0])
            rate_of_coop[0, j] += np.mean(traces[1])

    total_payoffs /= float(n-1)
    rate_of_coop /= float(n-1)

    for i in range(n):
        #pop[i].fitness.values = [total_payoffs[0,i] - intel_penalty*pop[i].get_intelligence()] 
        pop[i].fitness.values = [total_payoffs[0,i], pop[i].get_intelligence()] 
        pop[i].rate_of_coop = rate_of_coop[0, i]


    # Examine the strategies in the population.
    class AlwaysCooperatePlayer(games.Player):
        def get_initial_action(self):
            return 1
        def get_action(self, prev_payoffs):
            return 1

    class AlwaysDefectPlayer(games.Player):
        def get_initial_action(self):
            return 0
        def get_action(self, prev_results):
            return 0

    class TitForTatPlayer(games.Player):
        def get_initial_action(self):
            return 1
        def get_action(self, prev_results):
            # Default cooperate, but defect if opponent defected.
            if prev_results[1]['action'] == 0:
                return 0
            else:
                return 1

    class TitForTwoTatsPlayer(games.Player):
        def __init__(self):
            self.opponent_defected = False
        def reset(self):
            self.opponent_defected = False
        def get_initial_action(self):
            return 1
        def get_action(self, prev_results):
            # Default cooperate, but defect if opponent defected twice
            # in a row.
            if prev_results[1]['action'] == 0 and self.opponent_defected:
                return 0
            elif prev_results[1]['action'] == 0:
                self.opponent_defected = True
                return 1
            else:
                self.opponent_defected = False
                return 1

    class PavlovPlayer(games.Player):
        def get_initial_action(self):
            return 1
        def get_action(self, prev_results):
            return prev_results[1]['action']

    class ProbabilisticPlayer(games.Player):
        def __init__(self, prob):
            # prob -> probability of cooperating
            self.prob = prob
        def get_initial_action(self):
            self.get_action(None)
        def get_action(self, prev_results):
            if np.random.rand() <= prob:
                return 1
            else:
                return 1

    probs = [0.0, 0.25, 0.5, 0.75, 1.0]
    n_games = 5
    n_rounds = 20
    n_total = len(probs)*n_games*n_rounds

    test_players = [AlwaysCooperatePlayer(),
                    AlwaysDefectPlayer(),
                    TitForTatPlayer(),
                    TitForTwoTatsPlayer(),
                    PavlovPlayer()]
    test_player_moves = np.zeros((len(test_players), n_total))
    pop_moves = np.zeros((len(pop), n_total))
    opp_moves = np.zeros(len(probs)*n_games*n_rounds)

    start_idx = 0
    for p in probs:
        for i in range(n_games):
            stop_idx = start_idx + n_rounds

            random_trace = [np.random.rand() < p for i in range(n_rounds)]
            random_trace = np.array(random_trace, dtype='float')

            opp_moves[start_idx:stop_idx] = random_trace

            for (j, player) in enumerate(test_players):
                _, traces = game.play_against_trace(player, random_trace)
                test_player_moves[j, start_idx:stop_idx] = traces[0]

            for (j, indiv) in enumerate(pop):
                _, traces = game.play_against_trace(NNPlayer(indiv), random_trace)
                pop_moves[j, start_idx:stop_idx] = traces[0]

            start_idx += n_rounds

    # (X - Y)^2 = X^2 - 2XY + Y^2
    pop_squared = np.square(pop_moves).sum(axis=1)
    test_squared = np.square(test_player_moves).sum(axis=1)
    pop_test = np.dot(pop_moves, test_player_moves.T)
    sq_dists = pop_squared[:,np.newaxis] - 2 * pop_test + test_squared[np.newaxis,:]
    sq_dists /= n_total

    closest_strat = sq_dists.argmin(axis=1)
    for (i, indiv) in enumerate(pop):
        indiv.closest_strategy = closest_strat[i]
        indiv.strategy_dists = sq_dists[i,:]

    return pop