def generate_unconditioned_example():
    return list(
      zip(
        convert_multinomial_sample(R.multinomial(num_data, create_multinomial())),
        shuffle(convert_multinomial_sample(R.multinomial(num_data, create_multinomial())))
      )
    )
Exemple #2
0
def sample_random_read(gene, true_psi, read_len, overhang_len):
    """
    Sample a random read (not taking into account overhang) from the
    given set of exons and the true Psi value.

    Note that if we're given a gene that has only two isoforms, the 'align' function of
    gene will return a read summary in the form of (NI, NE, NB) rather than an alignment to
    the two isoforms (which is a pair (0/1, 0/1)).
    """
    iso_lens = [iso.len for iso in gene.isoforms]
    num_positions = array([(l - read_len + 1) for l in iso_lens])
    # probability of sampling a particular position from an isoform -- assume uniform for now
    iso_probs = [1/float(n) for n in num_positions]
    psi_frag_denom = sum(num_positions * array(true_psi))
    psi_frags = [(num_pos * curr_psi)/psi_frag_denom for num_pos, curr_psi \
                 in zip(num_positions, true_psi)]
    # Choose isoform to sample read from
    chosen_iso = list(multinomial(1, psi_frags)).index(1)
    isoform_position_prob = ones(num_positions[chosen_iso]) * iso_probs[chosen_iso]
    sampled_read_start = list(multinomial(1, isoform_position_prob)).index(1)
    sampled_read_end = sampled_read_start + read_len - 1
#    seq = gene.isoforms[chosen_iso].seq[sampled_read_start:sampled_read_end]
#    alignment, category = gene.align(seq, overhang=overhang_len)
    ##
    ## Trying out new alignment method
    ##
    # convert coordinates to genomic
    genomic_read_start, genomic_read_end = \
			gene.isoforms[chosen_iso].isoform_coords_to_genomic(sampled_read_start,
									    sampled_read_end)
    alignment, category = gene.align_read(genomic_read_start, genomic_read_end, overhang=overhang_len)
    return (tuple(alignment), [sampled_read_start, sampled_read_end], category, chosen_iso)
Exemple #3
0
 def sample(self, T, s_init=None,x_init=None,y_init=None):
   """
   Inputs:
     T: time to run simulation
   Outputs:
     xs: Hidden continuous states
     Ss: Hidden switch states
   """
   x_dim, y_dim, = self.x_dim, self.y_dim
   # Allocate Memory
   xs = zeros((T, x_dim))
   Ss = zeros(T)
   # Compute Invariant
   _, vl = linalg.eig(self.Z, left=True, right=False)
   pi = vl[:,0]
   # Sample Start conditions
   sample = multinomial(1, pi, size=1)
   if s_init == None:
     Ss[0] = nonzero(sample)[0][0]
   else:
     Ss[0] = s_init
   if x_init == None:
     xs[0] = multivariate_normal(self.mus[Ss[0]], self.Sigmas[Ss[0]])
   else:
     xs[0] = x_init
   # Perform time updates
   for t in range(0,T-1):
     s = Ss[t]
     A = self.As[s]
     b = self.bs[s]
     Q = self.Qs[s]
     xs[t+1] = multivariate_normal(dot(A,xs[t]) + b, Q)
     sample = multinomial(1,self.Z[s],size=1)[0]
     Ss[t+1] = nonzero(sample)[0][0]
   return (xs, Ss)
def generate_conditioned_example():
    return list(
      chain(
        *(
          [(x, y) for y in convert_multinomial_sample(R.multinomial(num, create_multinomial()))]
          for x, num in enumerate(R.multinomial(num_data, create_multinomial()))
        )
      )
    )
Exemple #5
0
def generate_corpus_vocab(args):
    vocab = Vocab()
    for i in xrange(1,args.num_words+1):
        vocab.add_word('word_' + str(i))
    ## this indicates how many vocab words to allocate for the vocab
    background_v_p = args.background_vocab_prop
    ## find the lengths of the background vs. non-background portions of the vocab
    background_vocab = int(len(vocab) * background_v_p)
    non_back_vocab = len(vocab) - background_vocab
    ## set up the non-background vocab span lengths: note we don't
    ## add an offset of background_vocab here, but we will have to later on
    larger_v_span = int(non_back_vocab * .5)
    smaller_v_span = non_back_vocab - larger_v_span
    ## Now find out how many words per doc to draw
    ## this indicates how many words per document to draw from the vocab
    background_prop = args.generated_background_prop
    back_words_per_doc = int(args.words_per_doc * background_prop)
    nb_words_per_doc = args.words_per_doc - back_words_per_doc
    larger_num = int(nb_words_per_doc * args.gen_bias)
    smaller_num = nb_words_per_doc - larger_num
    ## now set up the probability spans
    background_p = background_vocab * ( [ 1.0/float(background_vocab) ] if background_vocab > 0 else [0.0])
    larger_p = larger_v_span * [ 1.0/float(larger_v_span) ]
    smaller_p = smaller_v_span * [ 1.0/float(smaller_v_span) ]
    left_doc_draw = larger_num
    right_doc_draw = smaller_num
    corpus = []
    for di in xrange(args.num_docs):
        # swap the pointers at the halfway point
        if di == int(args.num_docs/2):
            left_doc_draw = smaller_num
            right_doc_draw = larger_num
        doc = {'id' : 'doc_'+str(di),
               'label' : None,
               'words' : [], 'counts' : []}
        counter = defaultdict(int)
#        print "NEW DOC"
        for (index,count) in enumerate(multinomial(back_words_per_doc, background_p)):
            if count > 0:
                counter[index] = count
#                print "\tbackground: %d (%d)" % (index, count)
        for (index,count) in enumerate(multinomial(left_doc_draw, larger_p)):
            if count > 0:
                counter[index + background_vocab] = count
#                print "\tleft: %d (%d)" % (index + background_vocab, count)
        for (index, count) in enumerate(multinomial(right_doc_draw, smaller_p)):
            if count > 0:
                counter[index + background_vocab + larger_v_span] = count
#                print "\tright: %d (%d)" % (index + background_vocab + larger_v_span, count)
        for (word, count) in counter.iteritems():
            doc['words'].append(word)
            doc['counts'].append(count)
        corpus.append(doc)
    return (corpus, vocab)
 def RandIntVec(self,ListSize, ListSumValue, Distribution='Normal'):
     """
     Inputs:
     ListSize = the size of the list to return
     ListSumValue = The sum of list values
     Distribution = can be 'uniform' for uniform distribution, 'normal' for a normal distribution ~ N(0,1) with +/- 3 sigma  (default), or a list of size 'ListSize' or 'ListSize - 1' for an empirical (arbitrary) distribution. Probabilities of each of the p different outcomes. These should sum to 1 (however, the last element is always assumed to account for the remaining probability, as long as sum(pvals[:-1]) <= 1).  
     Output:
     A list of random integers of length 'ListSize' whose sum is 'ListSumValue'.
     """
     if type(Distribution) == list:
         DistributionSize = len(Distribution)
         if ListSize == DistributionSize or (ListSize-1) == DistributionSize:
             Values = multinomial(ListSumValue,Distribution,size=1)
             OutputValue = Values[0]
     elif Distribution.lower() == 'uniform': #I do not recommend this!!!! I see that it is not as random (at least on my computer) as I had hoped
         UniformDistro = [1/ListSize for i in range(ListSize)]
         Values = multinomial(ListSumValue,UniformDistro,size=1)
         OutputValue = Values[0]
     elif Distribution.lower() == 'normal':
         """
         Normal Distribution Construction....It's very flexible and hideous
         Assume a +-3 sigma range.  Warning, this may or may not be a suitable range for your implementation!
         If one wishes to explore a different range, then changes the LowSigma and HighSigma values
         """
         LowSigma    = -3#-3 sigma
         HighSigma   = 3#+3 sigma
         if (float(ListSize) - 1) == 0:
             StepSize    = 0
         else:
             StepSize    = 1/(float(ListSize) - 1)
         ZValues     = [(LowSigma * (1-i*StepSize) +(i*StepSize)*HighSigma) for i in range(int(ListSize))]
         #Construction parameters for N(Mean,Variance) - Default is N(0,1)
         Mean        = 0
         Var         = 1
         #NormalDistro= [self.NormalDistributionFunction(Mean, Var, x) for x in ZValues]
         NormalDistro= list()
         for i in range(len(ZValues)):
             if i==0:
                 ERFCVAL = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2))
                 NormalDistro.append(ERFCVAL)
             elif i ==  len(ZValues) - 1:
                 ERFCVAL = NormalDistro[0]
                 NormalDistro.append(ERFCVAL)
             else:
                 ERFCVAL1 = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2))
                 ERFCVAL2 = 0.5 * math.erfc(-ZValues[i-1]/math.sqrt(2))
                 ERFCVAL = ERFCVAL1 - ERFCVAL2
                 NormalDistro.append(ERFCVAL)  
         #print "Normal Distribution sum = %f"%sum(NormalDistro)
         Values = multinomial(ListSumValue,NormalDistro,size=1)
         OutputValue = Values[0]
     else:
         raise ValueError ('Cannot create desired vector')
     return OutputValue
Exemple #7
0
	def getColor(self):
		return self.color
		arg = argmax(multinomial(1, [self.histcons, self.emocons], size=1))
		if (arg == 0):
			emo = argmax(multinomial(1, [self.pos, self.neg, self.neutral], size=1))
			if(emo == 0):
				return self.pos_color
			if(emo == 1)
				return self.neg_color
			if(emo == 2):
				return self.neutral_color
Exemple #8
0
def categorical(p, size=None):
    """ Cat K = Multi 1 K """
    assert 0.99<sum(p)<1.01
    if size is None:
        return list(sample.multinomial(1, p)).index(1)
    elif type(size)==int:
        n = size
        return array([list(sample.multinomial(1, p)).index(1) for _ in xrange(n)])
    elif len(size)==2:
        n,m = size
        return array([[list(sample.multinomial(1, p)).index(1) for _ in xrange(m)] for _ in xrange(n)])
    else:pass
Exemple #9
0
    def generator(self):
        """
        Simulate data from the sticky HDP-HMM.
        """
        self.state = [list(np.where(multinomial(1, dirichlet(self.beta), self.N))[1])]
        for i in range(1, self.T):
            self.state.append(list(np.where(multinomial(1, self.PI[i, :]))[0][0] for i in self.state[-1]))
            
        for i in range(self.T):
            self.data.append([normal(self.clusterPars[j][0], 
                              self.clusterPars[j][1]) for j in self.state[i]])

        self.state = np.array(self.state)
        self.data = np.array(self.data)
    def simulate_combat(self, allowed_time,
                        ant_0_scoring = ConservativeScore,
                        ant_1_scoring = ConservativeScore,
                        log = None):
        start = time.time()
        score_0 = ant_0_scoring(self, 0)
        score_1 = ant_1_scoring(self, 1)
        
        self.allowed_policies()
        init_poses = dict( (a, a.pos) for a in self.ants)
        
        killed = []
        steps = 0
        while (time.time() - start) < allowed_time:
            steps += 1
            action = {}
            for k in killed:
                self.add_ant(k)
            for a,p in init_poses.iteritems():
                a.pos = p
            
            for ant in self.ants:
                ps = dirichlet(self.policy[ant])
                i = multinomial(1, ps).nonzero()[0][0]
                if not (self.move_direction(ant, self.actions[i])):
                    print "CAZZZ"
                action[ant] = i
                
            killed = self.step_turn()
            for a, p in self.policy.iteritems():
                if a.owner == 0:
                    p[action[a]] += score_0(self)
                else:
                    p[action[a]] += score_1(self)

        for k in killed:
            self.add_ant(k)
        for a,p in init_poses.iteritems():
            a.pos = p
        
        retpolicy = {}
        for a,p in self.policy.iteritems():
            ps = dirichlet(p)
            i = multinomial(1, ps).nonzero()[0][0]
            retpolicy[a] = self.actions[i]
        if log is not None:
            log.info("Number of steps: %d", steps)
        else:
            print "Number of steps: ", steps
        return retpolicy
Exemple #11
0
def sample_rho(v0_range, v1_range, v0_num_grid, v1_num_grid, K, num_1_vec,
               num_0_vec, p):
    v0_grid = np.linspace(v0_range[0], v0_range[1], v0_num_grid)
    v1_grid = np.linspace(v1_range[0], v1_range[1], v1_num_grid)

    posterior_grid = np.zeros((v0_num_grid, v1_num_grid))

    for ii, v0 in enumerate(v0_grid):
        for jj, v1 in enumerate(v1_grid):
            rho0, rho1 = transform_var_poly(v0, v1, p)
            posterior_grid[ii,
                           jj] = compute_rho_posterior(rho0, rho1, K,
                                                       num_1_vec, num_0_vec)

    posterior_grid = np.exp(posterior_grid - posterior_grid.max())
    posterior_grid /= (posterior_grid.sum())
    #print((posterior_grid));

    v_sample = np.where(multinomial(1, posterior_grid.reshape(-1)))[0][0]
    v0 = v0_grid[int(v_sample // v1_num_grid)]
    v1 = v1_grid[int(v_sample % v1_num_grid)]

    rho0, rho1 = transform_var_poly(v0, v1, p)

    return rho0, rho1, posterior_grid
def SamplesFromGaussianMixture(Probs, Means, CovarianceMatrices, SampleCount, TrivialCovariances=False, Precision=np.float_, ChoicesPrecision=np.int_):
  MixtureCount = Probs.shape[0]  
  Dimension    = Means.shape[1]
  CholeskyMatrices = CovarianceMatrices
  if not TrivialCovariances:
      CholeskyMatrices = np.linalg.cholesky(CovarianceMatrices) # K x d x d
    
  # Means - K x d
  # CholeskyMatrices - # K x d x d
  
  ResultSet = np.empty(shape=(Dimension, SampleCount), dtype=Precision) # d x N
  Choices = np.zeros(SampleCount, dtype=ChoicesPrecision)

  MixturesToSample = multinomial(SampleCount, Probs)
  GeneratedSamples = 0
  for MixtureInd in range(MixtureCount):
     Count = MixturesToSample[MixtureInd]
     ZMatrix = normal(size=(Dimension, Count))
     if not TrivialCovariances:
       ZMatrix = np.dot(CholeskyMatrices[MixtureInd], ZMatrix)
     ResultSet[:, GeneratedSamples:(GeneratedSamples + Count)] = ZMatrix + Means[MixtureInd].reshape(Means[MixtureInd].shape[0], 1)
     Choices[GeneratedSamples:(GeneratedSamples + Count)] = MixtureInd
     GeneratedSamples += Count
 
  return ResultSet, Choices
Exemple #13
0
 def get_next_sample(self, mask, test_mask=None):
     p = multinomial(1, self.p_weights).argmax()
     _X = self._reconstruct(self.E[p], self.R[p], False)
     _X[mask[:self.n_pure_relations] == 1] = MIN_VAL
     if test_mask is not None:
         _X[test_mask == 1] = MIN_VAL
     return np.unravel_index(_X.argmax(), _X.shape)
Exemple #14
0
def sample_last(zt, wt, yt, n_mat, ysum, ycnt, beta_vec, beta_new, kappa_vec,
                kappa_new, alpha0, gamma0, sigma0, mu0, sigma0_pri, rho0, rho1,
                K):
    T = len(zt)

    ########### last time point ##########
    t = T - 1
    j = zt[t - 1]
    if wt[t] == 0:
        n_mat[j, zt[t]] -= 1

    ysum[zt[t]] -= yt[t]
    ycnt[zt[t]] -= 1

    ## conpute posterior distributions
    tmp_vec = np.arange(K)
    zt_dist = (alpha0 * beta_vec + n_mat[j]) / (alpha0 + n_mat[j].sum())
    knew_dist = alpha0 * beta_new / (alpha0 + n_mat[j].sum())

    ## compute y marginal likelihood
    varn = 1 / (1 / (sigma0_pri**2) + ycnt / (sigma0**2))
    mun = ((mu0 / (sigma0_pri**2)) + (ysum / (sigma0**2))) * varn

    yt_dist = ss.norm.pdf(yt[t], mun, np.sqrt((sigma0**2) + varn))
    yt_knew_dist = ss.norm.pdf(yt[t], mu0,
                               np.sqrt((sigma0**2) + (sigma0_pri**2)))

    ## construct z,w's posterior by cases
    post_cases = np.hstack(
        (kappa_vec[j] * yt_dist[j], (1 - kappa_vec[j]) * zt_dist * yt_dist,
         (1 - kappa_vec[j]) * knew_dist * yt_knew_dist))

    ## sample zt, wt
    post_cases = post_cases / (post_cases.sum())
    sample_rlt = np.where(multinomial(1, post_cases))[0][0]
    if sample_rlt < 1:
        zt[t], wt[t] = [j, 1]
    else:
        zt[t], wt[t] = [sample_rlt - 1, 0]

    ## update beta_vec, kappa_vec, n_mat when having a new state
    if zt[t] == K:
        b = beta(1, gamma0, size=1)
        beta_vec = np.hstack((beta_vec, b * beta_new))
        kappa_vec = np.hstack((kappa_vec, kappa_new))
        beta_new = (1 - b) * beta_new
        kappa_new = beta(rho0, rho1, size=1)
        n_mat = np.hstack((n_mat, np.zeros((K, 1))))
        n_mat = np.vstack((n_mat, np.zeros((1, K + 1))))
        ysum = np.hstack((ysum, 0))
        ycnt = np.hstack((ycnt, 0))
        K += 1

    ## update n_mat
    if wt[t] == 0:
        n_mat[j, zt[t]] += 1
    ysum[zt[t]] += yt[t]
    ycnt[zt[t]] += 1

    return zt, wt, n_mat, ysum, ycnt, beta_vec, kappa_vec, beta_new, kappa_new, K
Exemple #15
0
def get_sampled_dict_counts(ssize_list, dcounts):
    """
    Often, we use a dict to keep counts of categories, classes, traits.  Given a dict where
    objects to count are keys, and counts are values, take samples from the dict with sizes
    given in the ssize_list, and return a new dict with the requested ssize as key, and a dict with
    object:count_in_sample as value.

    :param ssize_list:
    :param dcounts:
    :return: dict with { ssize: { object: count }} for all ssize in ssize_list
    """
    result = dict()
    total = sum(dcounts.values())
    for ssize in ssize_list:
        if ssize > total:
            raise ValueError("sample size requested: %s is larger than the population: %s" % (ssize, total))

        traits = []
        prob = []
        for trait, count in dcounts.items():
            if count > 0:
                traits.append(trait)
                prob.append(float(count) / float(total))

        sampled_counts = npr.multinomial(ssize,prob,size=1)
        count_list = sampled_counts.tolist()
        #log.debug("traits: %s total: %s prob: %s counts: %s", traits, total, prob, count_list)
        sampled_dict = dict(zip(traits, count_list[0]))
        result[ssize] = sampled_dict

    #log.debug("result from sampled dict: %s", result)
    return result
    def grow(self):
        """Grow the population to carrying capacity
        
        The final population size is determined based on the proportion of
        producers present. This population is determined by drawing from a
        multinomial with the probability of each genotype proportional to its
        abundance times its fitness.
        """

        if self.is_empty():
            return

        if not self.diluted:
            return

        landscape = self.metapopulation.fitness_landscape

        final_size = self.capacity_min + \
                (self.capacity_max - self.capacity_min) * \
                self.prop_producers()

        grow_probs = self.abundances * (landscape/nsum(landscape))

        if nsum(grow_probs) > 0:
            norm_grow_probs = grow_probs/nsum(grow_probs)
            self.abundances = multinomial(final_size, norm_grow_probs, 1)[0]
Exemple #17
0
    def sample_topic_freqs(word_topic_rates, mask):
        """Given observed_word_freqs, that each word was held out (if mask is
        0) or not held out (if mask is 1), and each word w was produced via
        topic i according to a Poisson with rate word_topic_rates[w, i],
        returns a sample from the posterior distribution on the number of times
        a word was generated from each topic.
        """

        # First, sample the contribution from words where mask was 0.  Each
        # such word is sampled from a poisson distribution.  The sum of poisson
        # distributions is poisson, so we only need to sample one poisson for
        # each topic.
        topic_freqs = \
            poisson(np.sum((1 - mask)[:, np.newaxis] * word_topic_rates, 0))

        # For each word where mask was 1, we sample from a multinomial
        # distribution with probabilities proportional to the word-topic rates
        # for that word.  For efficiency, we skip over words that didn't occur
        # at all: multinomial_words is an array of the indices of words for
        # which we do need to take a sample.
        multinomial_words = np.arange(vocab_size)[
            np.array(mask * observed_word_freqs, dtype = bool)]
        for word in multinomial_words:
            topic_rates = word_topic_rates[word, :]
            topic_freqs += multinomial(observed_word_freqs[word],
                                       topic_rates / sum(topic_rates))
        return topic_freqs
def sample_assignments(W, B, grid_counts, z_curr, is_log=False):
  """ W is a K by Nplayer matrix of weights (per player)
      B is the K by Vtiles matrix of (positive) bases 
      grid_counts is the Nplayer by V matrix of counts 
      z_curr is the Nplayer by V tile by K 3D array of current assignments
  """
  K = W.shape[0]  # number of basis surfaces
  N = W.shape[1]  # number of players or time steps
  V = B.shape[1]  # number of spatial tiles

  # pass in log weights and basis (more numerically stable)
  if is_log: 
    for n in range(N): 
      for v in range(V): 
        N_nv = grid_counts[n,v]
        if N_nv > 0: 
          Lam = np.exp(W[:,n] + B[:,v])
          Lam = Lam / Lam.sum()
          z_curr[n,v,:] = np.random.multinomial(N_nv, Lam, size=1)[0]
        else:
          z_curr[n,v,:] = np.zeros(K)
  else: 
    for n in range(N): 
      for v in range(V):
        N_nv = grid_counts[n,v]
        if N_nv > 0: 
          Lam  = W[:,n]*B[:,v]
          Lam  = Lam / Lam.sum()
          z_curr[n,v,:] = npr.multinomial(N_nv, Lam, size=1)[0]
        else: 
          z_curr[n,v,:] = np.zeros(K)

  return z_curr
Exemple #19
0
def make_schedule(n_cat, n_total, max_repeat):
    """Generate an event schedule subject to a repeat constraint."""

    # Make the uniform transition matrix
    ideal_tmat = [1 / n_cat] * n_cat
    # Build the transition matrices for when we've exceeded our repeat limit
    const_mat_list = []
    for i in range(n_cat):
        const_mat_list.append([1 / (n_cat - 1)] * n_cat)
        const_mat_list[-1][i] = 0

    # Convenience function to make the transitions
    cat_range = np.arange(n_cat)
    draw = lambda x: np.asscalar(cat_range[multinomial(1, x).astype(bool)])

    # Generate the schedule
    schedule = []
    for i in xrange(n_total):
        trailing_set = set(schedule[-max_repeat:])
        # Check if we're at our repeat limit
        if len(trailing_set) == 1:
            tdist = const_mat_list[trailing_set.pop()]
        else:
            tdist = ideal_tmat
        # Assign this iteration's state
        schedule.append(draw(tdist))

    return schedule
def barabasiAlbert(n, d):
    """Generates an undirected Barabasi-Albert  random graph.
    A Barabasi-Albert (A.K.A. preferential atachment) random
    graph starts with a clique of d nodes, then adds nodes 
    sequentially. Each new node is connected to d existing,
    chosen with probability proportional to the existing node's
    degree.

    en.wikipedia.org/wiki/Barab%C3%A1si%E2%80%93Albert_model

    n: number of nodes
    d: degree of each new node
    """
    nodes = range(n)
    edges = set()
    degrees = np.zeros(n)
    for node in nodes:
        degrees[node] += 1
        new_edges = set()
        while degrees[node] <= d and degrees[node] <= node:
            neighbor = list(multinomial(1, degrees / degrees.sum())).index(1)
            e = (node, neighbor)
            if (e in new_edges) or (e[0]==e[1]):
                continue
            new_edges.add(e)
            degrees[neighbor] += 1
            degrees[node] += 1
        edges.update(new_edges)
    return Graphs.UndirectedGraph(nodes, edges)
def total_zeros_multinomial_test(hit_table, ws, num_bootstraps=1000):

    ps = ws * 1.0 / ws.sum()
    ntots = hit_table.sum(axis=1)

    observed_prob = 0
    for i in xrange(0, len(ntots)):
        observed_prob += calculate_zeros_multinomial_distance(hit_table[i], ws)

    bootstrapped_probs = []

    for bootstrap_idx in xrange(0, num_bootstraps):

        bootstrapped_prob = 0
        for i in xrange(0, len(ntots)):
            ns = multinomial(ntots[i], ps)
            bootstrapped_prob += calculate_zeros_multinomial_distance(ns, ws)

        bootstrapped_probs.append(bootstrapped_prob)

    bootstrapped_probs = numpy.array(bootstrapped_probs)

    pvalue = ((bootstrapped_probs >= observed_prob).sum() +
              1.0) / (len(bootstrapped_probs) + 1.0)

    return observed_prob, bootstrapped_probs.mean(
    ), bootstrapped_probs.std() * 2, pvalue
    def resampleOptionally(self):
        N = len(self.taus); 
        effective_dimension = 1.0 / dot(self.weights, self.weights)

        if effective_dimension > self.resample_threshold*N:
            return;
        print 'RESAMPLING'        
        
        old_taus_chosen = multinomial(N, self.weights);
        
        new_taus = array([]);
        
        mu, Xi = self.ensembleLogMeanVar();
        Xi = self._h * sqrt( Xi );
        
        'MAIN LOOP:'
        'Remember we are working with log-taus'
        for idx, Nchosen in enumerate(old_taus_chosen):
            if Nchosen == 0:
                continue
            'get the '
            log_tau_i = log(self.taus[idx]);
            mu_i = self._a*log_tau_i + (1-self._a)*mu;
            
            new_taus_i = normal(loc=mu_i, scale=Xi, size=Nchosen)
            
            new_taus = r_[ new_taus, new_taus_i]
        
        'remember to exponentiate the draws:'
        self.taus = sort( exp(new_taus)) ;
        self.weights = ones_like(self.taus)/self.Ntaus();
Exemple #23
0
def sendInfectedPass(self, infgroup, pdests):
    '''
    if there is people infected, check where they are going to...
    inf = [Ip0pos,Ip2pos,Ip10pos,Ip15pos,Ip20pos,Ip40pos,Is0pos,Is2pos,Is10pos,Is15pos,Is20pos,Is40pos]
    '''
    # Quantos vao sair?
    
    a = [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]
    if (pdests):
        n = 0 # indexador do inf 
        for i in infgroup:
            posdest = a[n]
            #print 'posdest=', posdest
            #print 'enviando %s para os destinos:'%i , pdests[posdest]
            if (int(i) & len(pdests[posdest])):
                destlist = pdests[posdest]
                #print 'destinos gerais de %s:'%a[n], destlist
                choosedest = multinomial(int(i),destlist.values())
                k=0
                for j in destlist: # enviando os infectados para seus destinos
                    j.dest.infectedvisiting[n] += choosedest[k]
                    k+=1
            n+=1

    return 1
def SimulateCreditNetwork(CN, price, events, DP, TR, BV, SC):
	"""
	CN - credit network
	DP - default probability array
	TR - transaction rate matrix
	BV - buy value matrix
	SC - sell cost matrix
	price - function to determine a price from value and cost
	events - number of transactions to simulate
	"""
	payoffs = dict([(n,0.) for n in CN.nodes])

	defaulters = filter(lambda n: R.binomial(1, DP[n]), CN.nodes)
	for d in defaulters:
		for n in CN.nodes:
			if CN.adjacent(n, d):
				payoffs[n] -= CN.weights[(n, d)]
		CN.removeNode(d)
		del payoffs[d]

	m = R.multinomial(events, array(TR.flat))
	l = TR.shape[0]
	transactors = sum([[(i/l,i%l)]*m[i] for i in range(l**2)], [])
	R.shuffle(transactors)
	for b,s in transactors:
		try:
			assert b in CN.nodes and s in CN.nodes
			CN.routePayment(b, s, price(BV[b,s], SC[b,s]))
		except (AssertionError, CreditError):
			continue
		payoffs[b] += BV[b,s]
		payoffs[s] -= SC[b,s]
	return payoffs
def population_coincidence_test(hit_table, ws, num_bootstraps=1000):

    ps = ws * 1.0 / ws.sum()
    ntots = hit_table.sum(axis=1)

    #print "Population coincidence test"
    #print ps
    #print ntots

    coincidences = (hit_table > 1.5).sum(axis=1)
    num_coincidences = (coincidences > 0.5).sum() * 1.0
    num_total = len(coincidences) * 1.0

    observed_fraction = num_coincidences * 1.0 / num_total

    bootstrapped_fractions = []

    for bootstrap_idx in xrange(0, num_bootstraps):
        bootstrapped_num_coincidences = 0
        bootstrapped_num_total = 0
        for i in xrange(0, len(ntots)):
            ns = multinomial(ntots[i], ps)
            bootstrapped_num_total += 1
            if (ns > 1.5).sum():
                bootstrapped_num_coincidences += 1
        bootstrapped_fractions.append(bootstrapped_num_coincidences * 1.0 /
                                      bootstrapped_num_total)

    bootstrapped_fractions = numpy.array(bootstrapped_fractions)
    pvalue = ((bootstrapped_fractions >= observed_fraction).sum() +
              1.0) / (len(bootstrapped_fractions) + 1.0)
    print observed_fraction, bootstrapped_fractions.mean(
    ), bootstrapped_fractions.std() * 2, pvalue
    return observed_fraction, pvalue
Exemple #26
0
def mean(data, num_resamples, percentiles=None):
    """Compute bootstrap bounds for the mean of a data set

    One particular use is compute bootstrap bounds on social welfare of a
    mixture if all of the samples are iid draws of welfare from the mixture.

    Parameters
    ----------
    data : [float] or ndarray
        The data to get bootstrap estimates around the mean of.
    num_resamples : int
        The number of bootstrap samples. Higher will take longer but also give
        better accuracy.
    percentiles : int or [int]
        The percentiles to compute on the resulting data in [0, 100]. Standard
        percentiles are 95, or [2.5, 97.5].
    """
    data = np.asarray(data, float).ravel()
    samples = rand.multinomial(data.size, np.ones(data.size) / data.size,
                               num_resamples)
    result = samples.dot(data) / data.size
    if percentiles is None:
        result.sort()
        return result
    else:
        return np.percentile(result, percentiles)
def clade_grouping_test(hit_table, num_bootstraps=1000):

    observed_loglikelihood = calculate_clade_grouping_loglikelihood(hit_table)

    ps = hit_table.sum(axis=0) * 1.0 / hit_table.sum()
    ntots = hit_table.sum(axis=1)

    bootstrapped_loglikelihoods = []
    for bootstrap_idx in xrange(0, num_bootstraps):
        bootstrapped_hit_table = []
        for i in xrange(0, len(ntots)):
            ns = multinomial(ntots[i], ps)
            bootstrapped_hit_table.append(ns)

        bootstrapped_hit_table = numpy.array(bootstrapped_hit_table)
        bootstrapped_loglikelihood = calculate_clade_grouping_loglikelihood(
            bootstrapped_hit_table)
        bootstrapped_loglikelihoods.append(bootstrapped_loglikelihood)

    bootstrapped_loglikelihoods = numpy.array(bootstrapped_loglikelihoods)

    pvalue = ((bootstrapped_loglikelihoods >= observed_loglikelihood).sum() +
              1.0) / (len(bootstrapped_loglikelihoods) + 1.0)

    print observed_loglikelihood, bootstrapped_loglikelihoods.mean(
    ), bootstrapped_loglikelihoods.std() * 2, pvalue
    return observed_loglikelihood, pvalue
Exemple #28
0
def chooseRand(dist):
	"""Given a non-negative vector sum with 1(a discrete probability distribution) return randomly chosen element"""
	if sum(dist) != 1: raise ValueError, 'Distribution does not sum to 1'
	if reduce(lambda x,y:x*y, dist) < 0: raise ValueError, 'Non-negative probabilty'
	randstate = random.multinomial(1, dist)
	pos = map(lambda c,d:c * d, randstate, range(len(dist)))
	return reduce (lambda c,d: c + d, pos)
    def mutate(self):
        """Mutate a Population
        
        Each genotype mutates to another with probability inversely proportional
        to the Hamming distance (# different bits in binary representation)
        between them. The distances between all pairs of genotypes is
        pre-calculated at the beginning of a run and stored in
        metapopulation.mutation_probs.
        
        """

        if self.is_empty():
            return

        if not self.diluted:
            return

        mutated_population = zeros(self.abundances.size, dtype=np.uint32)

        for i in np.nonzero(self.abundances)[0]:
            mutated_population += multinomial(self.abundances[i],
                                              self.metapopulation.mutation_probs[i],
                                              size=1)[0]

        self.abundances = mutated_population
def genCityCC(cityStateMap, cc2PopMap, city):
    total = 0
    dist = []
    codes = []
    #print ("key, pop, county code")
    for state in cityStateMap:
        key = state.split(',')[0]
        if city == key:
            county_code = cityStateMap[state]
            codes.append(county_code)
            pop =  cc2PopMap[county_code]
            dist.append(pop)
            total = total + pop
            #print (state, pop, county_code)
            
            
    if total == 0:
        return ""
    
    for i, item in enumerate(dist):
        dist[i] = dist[i]/total
    sample = random.multinomial(1, dist, size=1)[0]
    #print (sample)
    index = np.nonzero(sample)[0][0]
    #print (index)
    #print (codes[index])
    return codes[index]
def initialize_discrete(X, R, depth, K, leaf_path, random=False):
    """
    xs=continuous latent states
    R,r= hpyer planes
    depth=depth of tree
    K = number of leaf nodes
    leaf_paths = paths associated with each leaf
    """
    
    Z = []
    Path = []
    for idx in range(len(X)):
        z = np.zeros(X[idx][0, :].size)
        path = np.zeros((depth, X[idx][0, :].size))
        for t in range(X[idx][0, :].size):
            log_prob = utils.compute_leaf_log_prob(R, X[idx][:, t], K, depth, leaf_path)
            p = np.exp(log_prob - np.max(log_prob))
            p = p/np.max(p)

            if random:  # If true then sample from pmf defined by p
                choice = npr.multinomial(1, p, size=1)

                path[:, t] = leaf_path[:, np.where(choice[0, :] == 1)[0][0]].ravel()
                z[t] = np.where(choice[0, :] == 1)[0][0]
            else:  # Initialize with bayes classifier
                choice = np.argmax(p)
                path[:, t] = leaf_path[:, choice].ravel()
                z[t] = choice

        Z.append(z)
        Path.append(path)
    return Z, Path
Exemple #32
0
def generate_sbm_data(N, K, alpha, a, b, m=None):
    """
    N is the number of nodes
    K is the number of blocks
    alpha is the concentration parameter
    a and b are the shape parameters
    m is the base measure
    """

    if m == None:
        m = ones(K) / K # uniform base measure

    Z = zeros((N, K)) # block assignments

    # sample (global) distribution over blocks

    [theta] = dirichlet(alpha * m, 1)

    # sample between- and within-block edge probabilities

    phi = beta(a, b, (K, K))

    # sample block assignments

    for n in range(1, N+1):
        Z[n-1,:] = multinomial(1, theta)

    # sample edges

    Y = (uniform(size=(N, N)) <= dot(dot(Z, phi), Z.T)).astype(int)

    return Z, Y
Exemple #33
0
    def __add_customer(self, context, restaurant, word):
        table_info = restaurant.w_tables_info(word)

        if len(table_info) != 0:
            tables_index, prob_mass = zip(*table_info.items())
        else:
            tables_index, prob_mass = ((),())
        prob_mass = [max(0, mass - 0.2) for mass in prob_mass] + [0.2 + 0.2 * restaurant.total_t]
        nmlz = 1 / sum(prob_mass)
        probs = [mass * nmlz for mass in prob_mass]

        random_index = multinomial(1, probs).tolist().index(1)
        if random_index >= len(tables_index):
            table_index = restaurant.table_next_index
            table = {"dish": word, "customer": 1}
            restaurant.tables[table_index] = table
            restaurant.table_next_index += 1
            restaurant.total_t += 1
            restaurant.total_c += 1
            return False
        else:
            table_index = tables_index[random_index]
            table = restaurant.tables[table_index]
            table["customer"] += 1
            restaurant.total_c += 1
            return True
Exemple #34
0
 def act_(self, state):
     """Choose an action based on current state.
     """
     if state is None:
         idx_action = randint(0, len(self.ACTIONS))  # if state cannot be internalized as state, random act
         if self.verbose > 0:
             print "  QAgent: ",
             print "randomly choose action {} (None state).".format(self.ACTIONS[idx_action])
     elif self.EXPLORE == 'epsilon':
         if rand() < self.EPSILON:  # random exploration with "epsilon" prob.
             idx_action = randint(0, len(self.ACTIONS))
             if self.verbose > 0:
                 print "  QAgent: ",
                 print "randomly choose action (Epsilon)."
         else:  # select the best action with "1-epsilon" prob., break tie randomly
             q_vals = self.lookup_table_(state)
             max_qval = max(q_vals)
             idx_best_actions = [i for i in range(len(q_vals)) if q_vals[i] == max_qval]
             idx_action = idx_best_actions[randint(0, len(idx_best_actions))]
             if self.verbose > 0:
                 print "  QAgent: ",
                 print "choose best q among {} (Epsilon).".format(
                     {self.ACTIONS[i]: q_vals[i] for i in range(len(self.ACTIONS))}
                 )
     elif self.EXPLORE == 'soft_probability':
             q_vals = self.lookup_table_(state)  # state = internal_state
             exp_q_vals = exp(q_vals)
             idx_action = multinomial(1, exp_q_vals/sum(exp_q_vals)).nonzero()[0][0]
             if self.verbose > 0:
                 print "  QAgent: ",
                 print "choose best q among {} (SoftProb).".format(dict(zip(self.ACTIONS, q_vals)))
     else:
         raise ValueError('Unknown keyword for exploration strategy!')
     return self.ACTIONS[idx_action]
Exemple #35
0
Fichier : ml.py Projet : qdbp/qqq
def gen_random_labels(X: Union[np.ndarray, int], n_classes: int,
                      pvec=None) -> np.ndarray:
    """
    Returns a random labelling of dataset X.

    Labels are one-hot encoded, with `n_classes` dimensions.

    Accepts an optional vector of probabilities with which to generate each
    class.

    Args:
        X: number of labels to generate, integer or array-like.
            If array-like, `X.shape[0]` labels will be generated.
        n_classes: number of output classes
        pvec: array-like, gives probabilities of each class. If `None`, all
            classes are equiprobable.

    Returns:
        ndarray[N, n_classes], the random, one-hot encoded labels.
    """

    if isinstance(X, int):
        num = X
    else:
        num = X.shape[0]

    pvec = np.ones((n_classes, )) / n_classes

    return npr.multinomial(1, pvec, size=num)
        def sample_topic_assignments(self):
            # user topics
            for i in xrange(self.N):
                for j in I_U[i]:
                    theta_U_star = np.zeros(self.K_U)
                    for k_idx, k in enumerate(self.z_U[i,:]):
                        theta_U_star[k_idx] = self.theta_U[i,j]*exp(-(X[(i,j)] - self.chi_0 - self.c_[i, self.z_V[i,j]] - self.d[k_idx, j] - np.dot(self.U_[i,:], self.V_[:,j]))**2/(2*self.sigmaSqd))
                    self.z_U[i,j] = multinomial(1, theta_U_star/sum(theta_U_star))

            # item topics
            for j in xrange(self.M):
                for i in self.I_V[j]:
                    theta_V_star = np.zeros(self.K_V)
                    for k_idx, k in enumerate(self.z_V[:,j]):
                        theta_V_star[k_idx] = self.theta_V[i,j]*exp(-(X[(i,j)] - self.chi_0 - self.c_[i, k_idx] - self.d[self.z_U[i,j], j] - np.dot(self.U_[i,:], self.V_[:,j]))**2/(2*self.sigmaSqd))
                    self.z_V[i,j] = multinomial(1, theta_V_star/sum(theta_V_star))
Exemple #37
0
def main():
    args = create_parser().parse_args()
    conf = json.load(args.configuration)
    roles = conf.pop('roles')
    mix = json.load(args.mixture)
    role_info = [(r, roles[r]) + tuple(zip(*s.items()))
                 for r, s in mix.items()]

    num = itertools.count()
    if args.num_samples:
        num = itertools.islice(num, args.num_samples)

    try:
        for _ in num:
            samp = {
                role: {
                    strat: int(count)
                    for strat, count in zip(s, rand.multinomial(c, probs))
                    if count > 0
                }
                for role, c, s, probs in role_info
            }
            conf['assignment'] = samp
            json.dump(conf, args.output)
            args.output.write('\n')

    except BrokenPipeError:
        pass
    def makeRandomSpectrum(self,
                           mols,
                           quants,
                           sigma,
                           jP=None,
                           prec_digits=None):
        """Simulate a mixture of isotopic envelopes.

        Parameters
        ----------
        mols : list
            A list of molecular species: tuples containing (id, chemical formula string, something, charge, quenched charge)
        quants : list
            A list of total intensities of each molecular species.
        sigma : float
            The standard deviation of the masses of isotopologues - theoretical equivalent of the mass resolution.
        jP : float
            The joint probability of the theoretical isotopic envelope.
        prec_digits : float
            The number of digits after which the floats get rounded.

        Returns
        -------
        spectrum : tuple
            A tuple containing the theoretical spectrum: mass over charge values and intensities.
        """
        x0 = sum(quants)
        if not prec_digits:
            prec_digits = self.prec_digits
        if not jP:
            jP = self.jP

        def get_intensity_measure(mols, quants):
            for mol, quant in zip(mols, quants):
                _, atomCnt_str, _, q, g = mol
                ave_mz, ave_intensity = self.isoEnvelope(
                    atomCnt_str=atomCnt_str, jP=jP, q=q, g=g, prec_digits=2)
                ave_intensity = quant * ave_intensity
                yield ave_mz, ave_intensity

        mz_average, intensity = reduce(merge_runs,
                                       get_intensity_measure(mols, quants))
        probs = intensity / sum(intensity)
        counts = np.array(multinomial(x0, probs), dtype='int')

        if sigma > 0.0:
            spectrum = Counter()
            for m_average, cnt in zip(mz_average, counts):
                if cnt > 0:
                    m_over_z = np.round(
                        normal(loc=m_average, scale=sigma, size=cnt),
                        prec_digits)
                    spectrum.update(m_over_z)

            spectrum = np.array(spectrum.keys()), np.array(
                [float(spectrum[k]) for k in spectrum])
        else:
            spectrum = (mz_average, counts)
        return spectrum
Exemple #39
0
def test_JSD():
    ALPHA, N, P = 1.0, 100, 20
    random.seed(SEED)
    pk = random.dirichlet([ALPHA] * P)
    counts = random.multinomial(N, pk, size=4)
    estimator = ndd.estimators.JSDivergence()
    ref_result = -0.01804523405829217
    assert numpy.isclose(estimator(counts), ref_result)
Exemple #40
0
def test_KLD():
    ALPHA, N, P = 1.0, 100, 20
    random.seed(SEED)
    qk = random.dirichlet([ALPHA] * P)
    pk = random.multinomial(N, qk)
    estimator = ndd.kullback_leibler_divergence
    ref_result = -0.04299973796573253
    assert numpy.isclose(estimator(pk, qk), ref_result)
def define_living_place(province):
    distribution = province.living_places
    total = distribution['Total']
    probabilities = [distribution[x] / total for x in province_names]
    selection = random.multinomial(1, probabilities)
    for i in range(len(selection)):
        if selection[i]:
            return provinces[i]
 def sample_newx(row):
     """
     sample a tag
     :param row: index of the current POS (we want to find the next which is one of the columns in the
                 row)
     :return: sample a tag
     """
     return np.where(multinomial(1, t[row, :]) == 1)[0][0]
Exemple #43
0
    def rvs(self, x=None, size=[], return_xy=False):
        if x is None:
            assert isinstance(size, int)
            x = npr.randn(size, self.D_in)

        else:
            assert x.ndim == 2 and x.shape[1] == self.D_in

        pi = self.pi(x)
        if pi.ndim == 1:
            y = npr.multinomial(self.N, pi)
        elif pi.ndim == 2:
            y = np.array([npr.multinomial(self.N, pp) for pp in pi])
        else:
            raise NotImplementedError

        return (x, y) if return_xy else y
Exemple #44
0
def error(alpha, n):
    """Return the actual error and the estimated uncertainty (normalized)"""
    k = len(alpha)
    pvals = dirichlet(alpha)
    counts = multinomial(n, pvals)
    h0 = sp_entropy(pvals)
    h, std = ndd.entropy(counts, k=k, return_std=True)
    return (h - h0) / h0, std / h0
Exemple #45
0
def test_JSD():
    ALPHA, N, P = 1.0, 100, 20
    random.seed(SEED)
    pk = random.dirichlet([ALPHA] * P)
    counts = random.multinomial(N, pk, size=4)
    estimator = ndd.divergence.JSDivergence()
    ref_result = -0.017281201076104313
    assert numpy.isclose(estimator(counts), ref_result)
Exemple #46
0
def smooth_comb(nsamp):
    inputs = []
    rates = []
    for k in xrange(6):
        inputs.append(((65-96)*2**-k/21, 32/63*2**(-2*k)))
        rates.append(2**(5-k)/63)
    counts = rand.multinomial(nsamp, rates, size=1)[0]
    return _generate(inputs, counts)
Exemple #47
0
def discrete_comb(nsamp):
    inputs = []
    for k in xrange(3):
        inputs.append(((2*k-15)/7, 2/7))
    for k in xrange(8, 11):
        inputs.append((2*k/7, 1/21))
    counts = rand.multinomial(nsamp, [2/7]*3+[1/21]*3, size=1)[0]
    return _generate(inputs, counts)
Exemple #48
0
def asym_claw(nsamp):
    inputs = [(0, 1)]
    rates = [1/2]
    for k in xrange(-2, 3):
        inputs.append((k+1/2, 2**(-k)/10))
        rates.append(2**(1-k)/31)
    counts = rand.multinomial(nsamp, rates, size=1)[0]
    return _generate(inputs, counts)
Exemple #49
0
def generate_spiral2d(
        nspiral=1000,
        ntotal=500,
        nsample=100,
        start=0.,
        stop=1,  # approximately equal to 6pi
        noise_std=.1,
        a=0.,
        b=1.,
        savefig=True):

    # add 1 all timestamps to avoid division by 0
    orig_ts = np.linspace(start, stop, num=ntotal)
    samp_ts = orig_ts[:nsample]

    # generate clock-wise and counter clock-wise spirals in observation space
    # with two sets of time-invariant latent dynamics
    zs_cw = stop + 1. - orig_ts
    rs_cw = a + b * 50. / zs_cw
    xs, ys = rs_cw * np.cos(zs_cw) - 5., rs_cw * np.sin(zs_cw)
    orig_traj_cw = np.stack((xs, ys), axis=1)

    zs_cc = orig_ts
    rw_cc = a + b * zs_cc
    xs, ys = rw_cc * np.cos(zs_cc) + 5., rw_cc * np.sin(zs_cc)
    orig_traj_cc = np.stack((xs, ys), axis=1)

    if savefig:
        plt.figure()
        plt.plot(orig_traj_cw[:, 0], orig_traj_cw[:, 1], label='clock')
        plt.plot(orig_traj_cc[:, 0], orig_traj_cc[:, 1], label='counter clock')
        plt.legend()
        plt.savefig('./ground_truth.png', dpi=500)
        print('Saved ground truth spiral at {}'.format('./ground_truth.png'))

    # sample starting timestamps
    orig_trajs = []
    samp_trajs = []
    for _ in range(nspiral):
        # don't sample t0 very near the start or the end
        t0_idx = npr.multinomial(1, [1. / (ntotal - 2. * nsample)] *
                                 (ntotal - int(2 * nsample)))
        t0_idx = np.argmax(t0_idx) + nsample

        cc = bool(npr.rand() > .5)  # uniformly select rotation
        orig_traj = orig_traj_cc if cc else orig_traj_cw
        orig_trajs.append(orig_traj)

        samp_traj = orig_traj[t0_idx:t0_idx + nsample, :].copy()
        samp_traj += npr.randn(*samp_traj.shape) * noise_std
        samp_trajs.append(samp_traj)

    # batching for sample trajectories is good for RNN; batching for original
    # trajectories only for ease of indexing
    orig_trajs = np.stack(orig_trajs, axis=0)
    samp_trajs = np.stack(samp_trajs, axis=0)

    return orig_trajs, samp_trajs, orig_ts, samp_ts
Exemple #50
0
def _get_standard_negative_triplets(graph_dataset, nodes, positives,
                                    num_negatives, neighbors_to_distances,
                                    distances_to_neighbors, max_neighbors,
                                    mode, is_val):
    """
    Get negatives for each (node, positive_node) pair by randomly and uniformly sampling nodes which are farther away
    from a given node than the corresponding positive node.
    :return: (List[int], List[int], List[int]) representing nodes, positives and negatives respectively.
    """
    filtered_nodes, filtered_positives, filtered_negatives = [], [], []
    for idx, (node, pos_candidate) in enumerate(zip(nodes, positives)):
        pos_distance = neighbors_to_distances[idx][pos_candidate]
        negative_rs_to_neighbors = {
            r: elems
            for r, elems in distances_to_neighbors[idx].items()
            if r > pos_distance
        }
        negative_candidate_counts = [
            len(radius_candidates) for radius, radius_candidates in sorted(
                negative_rs_to_neighbors.items())
        ]
        total_candidates = sum(negative_candidate_counts)
        if total_candidates == 0:
            LOG.debug(
                f'Sampling random nodes as negative candidates for {node}, {pos_candidate}.'
            )
            negative_candidates = _get_random_negative_nodes(
                graph_dataset, node, num_negatives,
                get_mask_from_mode(graph_dataset, mode),
                neighbors_to_distances[idx], pos_distance)
        elif is_val:
            negative_candidates = list(
                islice(chain.from_iterable(negative_rs_to_neighbors.values()),
                       num_negatives))
        else:
            normalized_candidate_counts = [
                count / float(total_candidates)
                for count in negative_candidate_counts
            ]
            sampled_radii = multinomial(min(num_negatives, total_candidates),
                                        normalized_candidate_counts)
            negative_candidates = []
            start_radius, end_radius = pos_distance + 1, pos_distance + 1 + len(
                sampled_radii)
            for radius, elems_count in zip(range(start_radius, end_radius),
                                           sampled_radii):
                negative_candidates.extend(
                    SAMPLE(
                        negative_rs_to_neighbors[radius],
                        min(elems_count,
                            len(negative_rs_to_neighbors[radius]))))

        for neg in negative_candidates:
            filtered_nodes.append(node)
            filtered_positives.append(pos_candidate)
            filtered_negatives.append(neg)

    return filtered_nodes, filtered_positives, filtered_negatives
Exemple #51
0
    def sample(self, params, xelems):
        if len(xelems) != self.xrank:
            raise ValueError(f'{self.xrank} inputs should be given;  '
                             f'{len(xelems)} given instead!')

        probs = self.probs(params, xelems).ravel()
        yi = rnd.multinomial(1, probs).argmax()
        ys = np.unravel_index(yi, self.ydims)
        return ys
Exemple #52
0
    def sample(self, *xs):
        self.logger.debug(f'sample() \t; x={xs}')
        assert len(xs) == self.nx

        probs = self.probs[xs].ravel()
        yi = rnd.multinomial(1, probs).argmax()
        yidxs = np.unravel_index(yi, self.ydims)
        ys = tuple(s.elem(i) for s, i in zip(self.yspaces, yidxs))
        return ys
Exemple #53
0
 def _sample(p):
     '''
     Sample with probability vector p from a multinomial distribution
     :param p: list
         List of probabilities representing probability vector for the multinomial distribution
     :return: int
         index of randomly selected output
     '''
     return [i for i, entry in enumerate(multinomial(1, p)) if entry != 0][0]
Exemple #54
0
def get_prob_user():
    df = show_info_of_user()
    s1 = df['work_times'].astype(int)
    s1[s1 == 0] = 1
    s2 = 1 / (s1 / s1.sum())
    prob = s2 / s2.sum()
    prob.iloc[-1] = prob.sum() - prob.iloc[:-1].sum()
    index = np.where(multinomial(1, prob) == 1)[0][0]
    return str(df.loc[index, 'username'])
Exemple #55
0
def main():
    # 20d6, sampled 30 times
    dice_roll_info = multinomial(20, [1 / 6] * 6, 30)
    rolls = [
        sum([(k + 1) * row[k] for k in range(len(row))])
        for row in dice_roll_info
    ]
    normalsorted = normalsort(rolls, bin_count=10, verbose=True)
    print(f'normalsort output:\n\t{normalsorted}')
def resample_population_matrix(population_matrix):

    ns = population_matrix.sum(axis=1)

    ps = population_matrix.sum(axis=0) * 1.0
    ps /= ps.sum()

    bootstrapped_matrix = numpy.array([multinomial(n, ps) for n in ns])
    return bootstrapped_matrix
Exemple #57
0
def multinomial_sample(distribution):
        """Sample a random integer according to a multinomial distribution.

        @param distribution: probabilitiy distribution
        @type distribution: array of log probabilities
        @return: integer in the range 0 to the length of distribution
        @rtype: integer
        """
        return multinomial(1, exp(distribution)).argmax()
    def update_CR_dist(self):
        t = 1
        Lm = 0
        pm = 1. / self.nCR

        for i in range(self.nchains):
            m = multinomial(1, [pm] * self.nCR).nonzero()[0][0] + 1
            CR = float(m) / self.nCR
            Lm += 1
def sample_random_read_pair(gene, true_psi, read_len, overhang_len, insert_len, mean_frag_len):
    """
    Sample a random paired-end read (not taking into account overhang) from the
    given a gene, the true Psi value, read length, overhang length and the insert length (fixed).

    A paired-end read is defined as (genomic_left_read_start, genomic_left_read_end,
                                     genomic_right_read_start, genomic_right_read_start).

    Note that if we're given a gene that has only two isoforms, the 'align' function of
    gene will return a read summary in the form of (NI, NE, NB) rather than an alignment to
    the two isoforms (which is a pair (0/1, 0/1)).
    """
    iso_lens = [iso.len for iso in gene.isoforms]
    num_positions = array([(l - mean_frag_len + 1) for l in iso_lens])
    # probability of sampling a particular position from an isoform -- assume uniform for now
    iso_probs = [1/float(n) for n in num_positions]
    psi_frag_denom = sum(num_positions * array(true_psi))
    psi_frags = [(num_pos * curr_psi)/psi_frag_denom for num_pos, curr_psi \
                 in zip(num_positions, true_psi)]
    # Choose isoform to sample read from
    chosen_iso = list(multinomial(1, psi_frags)).index(1)
    iso_len = gene.isoforms[chosen_iso].len
    frag_len = insert_len + 2*read_len
    isoform_position_probs = compute_read_pair_position_prob(iso_len, read_len, frag_len)
    # sanity check
    left_read_start = list(multinomial(1, isoform_position_probs)).index(1)
    left_read_end = left_read_start + read_len - 1
    # right read starts after the left read and the insert length
    right_read_start = left_read_start + read_len + insert_len 
    right_read_end = left_read_start + (2*read_len) + insert_len - 1
    # convert read coordinates from coordinates of isoform that generated it to genomic coordinates
    genomic_left_read_start, genomic_left_read_end = \
			     gene.isoforms[chosen_iso].isoform_coords_to_genomic(left_read_start,
										 left_read_end)

    genomic_right_read_start, genomic_right_read_end = \
			      gene.isoforms[chosen_iso].isoform_coords_to_genomic(right_read_start,
										  right_read_end)
    # parameterized paired end reads as the start coordinate of the left
    pe_read = (genomic_left_read_start, genomic_left_read_end,
	       genomic_right_read_start, genomic_right_read_end)
    alignment, frag_lens = gene.align_read_pair(pe_read[0], pe_read[1], pe_read[2], pe_read[3],
					       overhang=overhang_len)
    return (alignment, frag_lens, pe_read)
Exemple #60
0
def asym_double_claw(nsamp):
    inputs = []
    for k in xrange(2):
        inputs.append((2*k-1, 2/3))
    for k in xrange(1, 4):
        inputs.append((-k/2, 1/100))
    for k in xrange(1, 4):
        inputs.append((k/2, 7/100))
    counts = rand.multinomial(nsamp, [46/100]*2+[1/300]*3+[7/300]*3, size=1)[0]
    return _generate(inputs, counts)