Python dirichlet Examples, numpy.random.dirichlet Python Examples

Example #1

0

Show file

File: prior_sampler.py Project: malmaud/damastes

def UpdateModelParamsFromPrior(state):
  alpha = state.hypers['alpha']
  alpha_token = state.hypers['alpha_token']
  state.topic_sub_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_subs))))
  state.topic_rel_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_rels))))
  state.type_sub_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_types, state.n_subs))))
  try:
    state.sub_token_weights = (
        cDists.SampleDirichletArray(alpha_token * np.ones((state.n_subs, state.n_tokens))))
    state.rel_token_weights = (
        cDists.SampleDirichletArray(alpha_token * np.ones((state.n_rels, state.n_tokens))))
  except ZeroDivisionError:
    # If alpha is extremely small, approximate the Dirichlet draw
    # with a delta function
    logging.warning('Token alpha %r very small. Approximating with Dirichlet draw.',
                    alpha_token)
    eps = 1e-4  # Small constant to prevent numeric instability
    sub_assign = random.randint(state.n_tokens, size=state.n_subs)
    ent_token_weights = np.zeros((state.n_subs, state.n_tokens)) + eps
    ent_token_weights[np.arange(state.n_subs), sub_assign] = 1. - eps
    state.sub_token_weights = ent_token_weights
    rel_assign = random.randint(state.n_rels, size=state.n_rels)
    rel_token_weights = np.zeros((state.n_rels, state.n_tokens)) + eps
    rel_token_weights[np.arange(state.n_rels), rel_assign] = 1. - eps
    state.rel_token_weights = rel_token_weights
  state.type_names = pandas.Series({0: 'ref', 1: 'real'})
  state.topic_weights = random.dirichlet(np.repeat(alpha, state.n_topics))
  state.type_weights = random.dirichlet(np.repeat(alpha, state.n_types))

Example #2

0

Show file

File: MixedMembershipMatrixFactorization.py Project: ljtheminister/BMML

        def sample_topic_parameters(self):
            for i in xrange(self.N):
                z_sum = 0
                for j in I_U[i]:
                    z_sum += self.z_U[i,j]
                self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum)

            for j in xrange(self.M):
                z_sum = 0
                for i in I_V[j]: 
                    z_sum += self.z_V[i,j]
                self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)

Example #3

0

Show file

File: experiments_PRNI2012.py Project: emanuele/prni2012_multiclass_test

def compute_logp_independent_block_mc(N, alpha_row=None, alpha_col=None, iterations=1e5):
    """Compute the montecarlo log likelihood of a matrix under the
    assumption of independence.
    """
    if N.size == 1 : return 0
    if alpha_row is None: alpha_row = np.ones(N.shape[1])
    if alpha_col is None: alpha_col = np.ones(N.shape[0])
    theta_row = dirichlet(alpha_row, size=int(iterations)).T
    theta_col = dirichlet(alpha_col, size=int(iterations)).T
    Theta = theta_row[:,None,:] * theta_col
    logp_ibs = gammaln(N.sum()+1) - gammaln(N+1).sum() + (np.log(Theta)*N[:,:,None]).sum(0).sum(0)
    return logmean(logp_ibs)

Example #4

0

Show file

File: gibbs-sampler.py Project: vgoklani/Naive-Bayes-Gibbs-Sampler

    def _initialize_gibbs_sampler(self):
        """
		Initialize the Gibbs sampler

		This sets the initial values of the C{labels} and C{thetas} parameters.
		"""
        pi = log(dirichlet(self.hyp_pi, 1)[0])
        categories = self._categories()
        documents = self._documents()
        self.thetas = empty(self.hyp_thetas.shape)
        for category_index in xrange(categories):
            self.thetas[category_index] = log(dirichlet(self.hyp_thetas[category_index], 1)[0])
        self.labels = array([multinomial_sample(pi) for _ in xrange(documents)])

Example #5

0

Show file

File: MixedMembershipMatrixFactorization.py Project: ljtheminister/M3F

        def sample_topic_parameters(self):
	    # user topic assignment
            for i in xrange(self.N):
                z_sum = 0
                for j in self.I_U[i]:
                    z_sum += self.z_U[i,j]
                self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum)
	    # item topic assigment
            for j in xrange(self.M):
                z_sum = 0
                for i in I_V[j]: 
                    z_sum += self.z_V[i,j]
                self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)

Example #6

0

Show file

File: simulator.py Project: lorenzoriano/AI-Challenge

    def simulate_combat(self, allowed_time,
                        ant_0_scoring = ConservativeScore,
                        ant_1_scoring = ConservativeScore,
                        log = None):
        start = time.time()
        score_0 = ant_0_scoring(self, 0)
        score_1 = ant_1_scoring(self, 1)
        
        self.allowed_policies()
        init_poses = dict( (a, a.pos) for a in self.ants)
        
        killed = []
        steps = 0
        while (time.time() - start) < allowed_time:
            steps += 1
            action = {}
            for k in killed:
                self.add_ant(k)
            for a,p in init_poses.iteritems():
                a.pos = p
            
            for ant in self.ants:
                ps = dirichlet(self.policy[ant])
                i = multinomial(1, ps).nonzero()[0][0]
                if not (self.move_direction(ant, self.actions[i])):
                    print "CAZZZ"
                action[ant] = i
                
            killed = self.step_turn()
            for a, p in self.policy.iteritems():
                if a.owner == 0:
                    p[action[a]] += score_0(self)
                else:
                    p[action[a]] += score_1(self)

        for k in killed:
            self.add_ant(k)
        for a,p in init_poses.iteritems():
            a.pos = p
        
        retpolicy = {}
        for a,p in self.policy.iteritems():
            ps = dirichlet(p)
            i = multinomial(1, ps).nonzero()[0][0]
            retpolicy[a] = self.actions[i]
        if log is not None:
            log.info("Number of steps: %d", steps)
        else:
            print "Number of steps: ", steps
        return retpolicy

Example #7

0

Show file

File: ctm_bohning_test.py Project: budgefeeney/sidetopics

 def _sampleFromModel(self, D=200, T=100, K=10, avgWordsPerDoc = 500):
     '''
     Create a test dataset according to the model
     
     Params:
         D - Sample documents (each with associated features)
         T - Vocabulary size, the number of "terms". Must be a square number
         K - Observed topics
         avgWordsPerDoc - average number of words per document generated (Poisson)
     
     Returns:
         modelState - a model state object configured for training
         tpcs       - the matrix of per-document topic distribution
         vocab      - the matrix of per-topic word distributions
         docLens    - the vector of document lengths
         X          - the DxF side information matrix
         W          - The DxW word matrix
     '''
     
     # Generate vocab
     beta = 0.1
     betaVec = np.ndarray((T,))
     betaVec.fill(beta)
     vocab = rd.dirichlet(betaVec, size=K)
     
     # Geneate the shared covariance matrix
     # ...no real structure in this.
     sigT = rd.random((K,K))
     sigT = sigT.dot(sigT)
     
     # Generate topic mean
     alpha = 1
     alphaVec = np.ndarray((K,))
     alphaVec.fill(alpha)
     topicMean = rd.dirichlet(alphaVec)
     
     # Generate the actual topics.
     tpcs = rd.multivariate_normal(topicMean, sigT, size=D)
     tpcs = rowwise_softmax(tpcs)
     
     # Generate the corpus
     docLens = rd.poisson(avgWordsPerDoc, (D,)).astype(np.float32)
     W = tpcs.dot(vocab)
     W *= docLens[:, np.newaxis]
     W = np.array(W, dtype=np.int32) # truncate word counts to integers
     W = ssp.csr_matrix(W)
     
     # Return the initialised model, the true parameter values, and the
     # generated observations
     return tpcs, vocab, docLens, W

Example #8

0

Show file

File: demoldaem.py Project: darcher005/lda-gibbs-em

def generate_corpus():
    beta = np.zeros((NUM_TOPICS, VOCAB_SIZE))
    corpus = np.zeros((NUM_DOCS, VOCAB_SIZE), dtype='int64')
    # Draw per-topic word distributions
    for k in range(NUM_TOPICS):
        beta[k,:] = nprand.dirichlet(eta)
    for m in range(NUM_DOCS):
        # Draw per-document topic distribution
        theta = nprand.dirichlet(alpha)
        for i in range(DOC_LENGTH):
            topic = lda.sample(theta)
            word = lda.sample(beta[topic,:])
            corpus[m,word] += 1
    return (corpus, beta)

Example #9

0

Show file

File: sampler.py Project: danoneata/my_gmms

def sample_pi(alpha, zz):
    """ Sample mixing weights from the posterior distribution p(\pi|Z). 

    Inputs
    ------
    alpha: array [nr_clusters]
        The parameters of the prior of the mixing weights Dirichlet(\pi|alpha).

    zz: array [nr_points]
        The observer assignments of each of the N points.

    Output
    ------
    pi: array [nr_clusters]
        Sample for mixing weights.

    """
    K = len(alpha)
    # Count how many times appears each state.
    counts = np.zeros(K)
    for state in zz:
        counts[state] += 1
    alpha_new = np.array(alpha) + counts
    pi = dirichlet(alpha_new)
    return pi

Example #10

0

Show file

File: bptd.py Project: aschein/bptd

    def _init_latent_params(self):
        V = self.n_actors
        A = self.n_actions
        T = self.n_timesteps
        R = self.n_regimes
        C = self.n_communities
        K = self.n_topics

        if self.gam is None:
            self.gam = (0.1 ** (1. / 4)) * (R + K + C + C)
            print 'Setting gam to: %f' % self.gam
        self.zeta = 1.
        self.delta = 1.

        self.rho_R = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=R)
        self.nu_K = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=K)
        self.eta_d_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C)
        self.eta_a_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C)

        self.d = 1.
        shp_RKCC = np.ones((R, K, C, C))
        shp_RKCC[:] = np.outer(self.eta_d_C, self.eta_d_C)
        shp_RKCC[:, :, np.identity(C).astype(bool)] = self.eta_a_C * self.eta_d_C
        shp_RKCC *= self.nu_K[None, :, None, None]
        shp_RKCC *= self.rho_R[:, None, None, None]
        self.Lambda_RKCC = sample_gamma(shp_RKCC, 1. / self.d)
        self.Psi_TR = sample_gamma(self.e, 1. / self.f, size=(T, R))
        self.Phi_AK = np.ones((A, K))
        self.Phi_AK[:, :] = rn.dirichlet(self.e * np.ones(A), size=K).T
        self.alpha_V = np.ones(V) * self.e
        self.beta = 1.
        self.Theta_VC = np.ones((V, C))

Example #11

0

Show file

File: bootstrap.py Project: ksmzn/Bootstrap

 def bayesian_bootstrap(self,par=1):
     weight=npr.dirichlet([par]*self.n,(self.times,self.num_samples))
     samples=[npr.choice(self.data[j],size=self.n,p=weight[j][i]) 
             for i in xrange(self.num_samples) for j in xrange(self.times)]
             #for i in xrange(self.times*self.num_samples)]
     samples=np.array(samples).reshape(self.times,self.num_samples,self.n)
     return samples

Example #12

0

Show file

File: discrete.py Project: ChayaSt/bhmm

    def sample(self, observations_by_state):
        """
        Sample a new set of distribution parameters given a sample of observations from the given state.

        The internal parameters are updated.

        Parameters
        ----------
        observations :  [ numpy.array with shape (N_k,) ] with nstates elements
            observations[k] are all observations associated with hidden state k

        Examples
        --------

        initialize output model

        >>> B = np.array([[0.5, 0.5], [0.1, 0.9]])
        >>> output_model = DiscreteOutputModel(B)

        sample given observation

        >>> obs = [[0, 0, 0, 1, 1, 1], [1, 1, 1, 1, 1, 1]]
        >>> output_model.sample(obs)

        """
        from numpy.random import dirichlet
        N, M = self._output_probabilities.shape  # nstates, nsymbols
        for i in range(len(observations_by_state)):
            # count symbols found in data
            count = np.bincount(observations_by_state[i], minlength=M).astype(float)
            # sample dirichlet distribution
            count += self.prior[i]
            if count.sum() > 0:  # if counts at all: can't sample, so leave output probabilities as they are.
                self._output_probabilities[i, :] = dirichlet(self.prior[i] + count)

Example #13

0

Show file

File: lda_vb_python.py Project: budgefeeney/sidetopics

def newQueryState(data, modelState):
    '''
    Creates a new LDA QueryState object. This contains all
    parameters and random variables tied to individual
    datapoints.

    Param:
    W - the DxT document-term matrix used for training or
        querying.
    modelState - the model state object

    Return:
    A QueryState object
    '''
    docLens = np.squeeze(np.asarray(data.words.sum(axis=1)))

    # Initialise the per-token assignments at random according to the dirichlet hyper
    # This is super-slow
    dist = modelState.topicPrior.copy()
    dist /= dist.sum()

    topicDists  = rd.dirichlet(dist, size=data.doc_count).astype(modelState.dtype)
    topicDists *= docLens[:, np.newaxis]
    topicDists += modelState.topicPrior[np.newaxis, :]

    return QueryState(docLens, topicDists, False)

Example #14

0

Show file

File: lazy_gsi_v2.py Project: yunjhongwu/stats607a-fall2015-supplementary

def test_case_generator():
    sys.path.append('solutions/hw2')
    sys.path.append('suppl/hw2')
    from assignment_two_adaboost import weak_learner as wl
    sys.path.pop()
    sys.path.pop()
    from kernels import rbf

    seed(1)
    instances = normal(size=(50, 5))
    labels = binomial(1, 0.5, 50)
    dist = dirichlet(uniform(size=50))
    ker = rbf(1)
    mat = uniform(size=(5, 5))
    mat = (mat / np.sum(mat, axis=1)).T
    test_cases = {'assignment_two_adaboost': {
        'compute_error':
        [lambda x: x[3] < 0.2, instances, labels, dist],
        'run_adaboost':
        [instances, labels, wl],
        'update_dist':
        [lambda x: x[2] > -0.2, instances,
         labels, dist, normal()],
        'weak_learner': [instances, labels, dist]},
        'assignment_two_pagerank': {'compute_pageranks': [mat],
                                    'main': []},
        'assignment_two_svm': {
        'evaluate_classifier':
        [lambda x: norm(x) > 5, instances, labels],
        'svm_train': [instances, labels, ker]}}

    return test_cases

Example #15

0

Show file

def newModelAtRandom(data, K, topicPrior=None, vocabPrior=None, dtype=DTYPE):
    '''
    Creates a new LDA ModelState for the given training set and
    the given number of topics. Everything is instantiated purely
    at random. This contains all parameters independent of of
    the dataset (e.g. learnt priors)
    
    Param:
    data - the dataset of words, features and links of which only words are used in this model
    K - the number of topics
    topicPrior - the prior over topics, either a scalar or a K-dimensional vector
    vocabPrior - the prior over vocabs, either a scalar or a T-dimensional vector
    dtype      - the datatype to be used throughout.
    
    Return:
    A ModelState object
    '''
    assert K > 1, "There must be at least two topics"
    T = data.words.shape[1]
    
    if topicPrior is None:
        topicPrior = constantArray((K,), 50.0 / K, dtype) # From Griffiths and Steyvers 2004
    if vocabPrior is None:
        vocabPrior = 1.1 # Also from G&S
    
    vocabPriorVec = constantArray((T,), vocabPrior, dtype)
    wordDists = rd.dirichlet(vocabPriorVec, size=K).astype(dtype)
    
    # Peturb to avoid zero probabilities
    wordDists += 1./T
    wordDists /= (wordDists.sum(axis=1))[:,np.newaxis]
    
    return ModelState(K, topicPrior, vocabPrior, wordDists, dtype, MODEL_NAME)

Example #16

0

Show file

File: update_methods.py Project: harrisonhunter/stratonovich

 def sample(self):
     '''
     alpha : array
     Parameter of the distribution (k dimension for sample of dimension k).
     size : array
     Number of samples to draw.
     '''
     return npr.dirichlet(self.alpha, size=None)

Example #17

0

Show file

 def sample(self, eta, size=1):
     """
     @param eta: the natural parameters
     @param size: the size of the sample
     A sample of sufficient statistics
     """
     from numpy.random import dirichlet
     return self.T(dirichlet(self.theta(eta), size=size))

Example #18

0

Show file

File: partial_independence.py Project: dweissman/inference_with_classifiers

def compute_logp_independent_block_mc(X, alpha=None, iterations=1e5):
    """Compute the montecarlo log likelihood of a matrix under the
    assumption of independence.
    """
    if alpha is None: alpha = np.ones(X.shape[1])
    Theta = dirichlet(alpha, size=int(iterations)).T
    logp_ibs = gammaln(X.sum(1)+1).sum() - gammaln(X+1).sum(1).sum() + (np.log(Theta[:,None,:])*X[:,:,None]).sum(1).sum(0) # log(\prod(one Multinomial pdf for each row))
    return logmean(logp_ibs)

Example #19

0

Show file

File: onepop_discrete_replicator.py Project: fkwai/simulations

    def _random_population(self):
        """ Generate a random population on the unit simplex of appropriate
            dimensionality

        """

        rand.seed()

        return rand.dirichlet([1] * len(self.types))

Example #20

0

Show file

def node_prob(tran_prior, node_tran_sample, num_node):
    node_prob_mat = tran_prior + node_tran_sample
    for i in range(num_node):
        node_prob_mat[i, :] = npr.dirichlet(node_prob_mat[i, :])
    node_prob_arr = np.reshape(node_prob_mat, num_node * num_node)
    #     print(node_prob_arr,'node_prob_arr')
    #     print(np.sum(node_prob_arr[0:9]),'np.sum(node_prob_arr[0:9])')

    return node_prob_arr

Example #21

0

Show file

File: crossover.py Project: adcorredorm/Evolutionary_Computing

    def apply(self, agents):
        alpha = dirichlet(ones(len(agents)))
        values = {}
        for param in self.params:
            aux = [agent.__dict__[param] for agent in agents]
            values[param] = real_crossover.recombinate(alpha, aux)

        _cls = agents[0].__class__
        return [_cls(**values)]

Example #22

0

Show file

def initialize_uniformly(data, D, K):

    mus = npr.uniform(low=0.7, high=1.3, size=[K, D])
    sigmas = np.zeros((K, D, D))
    for k in range(K):
        sigmas[k, :, :] = np.eye(D, D)
    pis = npr.dirichlet(np.ones(K))  #mixture coefficients

    return mus, sigmas, pis

Example #23

0

Show file

File: __init__.py Project: JohnReid/infpy

 def _randomise(self):
     """
     Randomise the variational parameters.
     """
     from numpy.random import dirichlet
     self.phi = dirichlet(ones(self.K), size=self.N)
     self.tau = outer(ones(self.K), self._lambda)
     self.gamma[:, 0] = 1.
     self.gamma[:, 1] = self.alpha

Example #24

0

Show file

def posterior_samples(Nlx, Nly, nsamps=100, verbose=False, clipthresh=1e-6):
    estimates = []
    if verbose:
        misc.pnn("\n %d samples to produce:" % nsamps)
    for i in range(nsamps):
        if verbose:
            misc.pnn(i)
        p = np.array([npr.dirichlet(x + 1) for x in Nlx])
        htilde = np.array([npr.dirichlet(x + 1) for x in Nly])

        qtilde = estimation.train_mlm_fixedp(htilde, p)

        # get the central q, for uniqueness
        q = polytopes.find_central(p, qtilde, clipthresh=1e-6)

        estimates.append((p, q))

    return estimates

Example #25

0

Show file

File: onepop_discrete_replicator.py Project: shipoopi/simulations

    def _random_population(self):
        """ Generate a random population on the unit simplex of appropriate
            dimensionality

        """

        rand.seed()

        return rand.dirichlet([1] * len(self.types))

Example #26

0

Show file

def gibbssample1(bn, likeexp):
    bn1 = copybn(bn)
    for node in bn1.keys():
        for parstates in bn1[node]['cp']:
            p1 = tuple(likeexp[node]['cp'][parstates].values())
            newparams = random.dirichlet(p1).tolist()
            skeys = bn[node]['cp'][parstates].keys()
            bn1[node]['cp'][parstates] = dict(zip(skeys, newparams))
    return bn1

Example #27

0

Show file

File: driver.py Project: lzh6710/amazon-review-spam

def gen_synthetic_graph(N, nc):
    graph = nx.Graph(name='synthezied author graph')
    cluster_sizes = [int(round(cs)) for cs in dirichlet([7] * nc) * N]
    ph_s = dirichlet([1] * nc)
    pr_s = dirichlet([1] * nc)
    pv_s = dirichlet([1] * nc)
    SIGMA = 0.6
    TAU = 0.9
    AVG_PER_CLASS_PROD = 5
    mus = normal(loc=5.5, scale=3, size=nc)
    all_products = range(nc * AVG_PER_CLASS_PROD)
    pi_s = []
    for ci in range(nc):
        pi_s.append(dirichlet([0.5] * len(all_products)))
    author_prod_map = {}

    # generate nodes
    for ci in range(nc):
        for ni in range(cluster_sizes[ci]):
            graph.add_node(len(graph), acluster=ci, revLen=normal(loc=mus[ci], scale=SIGMA),
                           isRealName=binomial(1, pr_s[ci]) == 1, hlpful_fav_unfav=binomial(1, ph_s[ci]) == 1,
                           vrf_prchs_fav_unfav=binomial(1, pv_s[ci]) == 1)
    # generate edges
    for a, b in itertools.combinations(graph.nodes(), 2):
        if not binomial(1, min(15.0/len(graph), 1.0)):
            continue
        if graph.node[a]['acluster'] == graph.node[b]['acluster']:
            if binomial(1, TAU):
                graph.add_edge(a, b, weight=np.clip(normal(1, scale=0.25), 0, 1), denom=5)
        else:
            if binomial(1, 1 - TAU):
                graph.add_edge(a, b, weight=np.clip(normal(0.5, scale=0.25), 0, 1), denom=5)
    # keep only the largest component
    # components = nx.connected_components(graph)
    # largest_component_i = np.argmax([len(c) for c in components])
    # largest_component = set(components[largest_component_i])
    # graph.remove_nodes_from([n for n in graph if n not in largest_component])
    # generate author_prod_map
    for n in graph:
        ci = graph.node[n]['acluster']
        nprods = randint(1, len(all_products)/2)
        author_prod_map[n] = list(np.nonzero(multinomial(nprods, pi_s[ci]))[0])

    return graph, author_prod_map, cluster_sizes

Example #28

0

Show file

File: bootstrap.py Project: ksmzn/Bootstrap

 def bayesian_bootstrap(self, par=1):
     weight = npr.dirichlet([par] * self.n, (self.times, self.num_samples))
     samples = [
         npr.choice(self.data[j], size=self.n, p=weight[j][i])
         for i in xrange(self.num_samples) for j in xrange(self.times)
     ]
     #for i in xrange(self.times*self.num_samples)]
     samples = np.array(samples).reshape(self.times, self.num_samples,
                                         self.n)
     return samples

Example #29

0

Show file

File: clusters.py Project: slinderman/neymanscott

    def jitter(self, concentration=100):
        pi = self.params[0]
        new_pi = npr.dirichlet(concentration * pi) + 1e-8
        new_pi /= new_pi.sum()
        fwd_lp = dirichlet(concentration * pi).logpdf(new_pi)
        rev_lp = dirichlet(concentration * new_pi).logpdf(pi)

        new_cluster = copy.deepcopy(self)
        new_cluster._params = (new_pi,)
        return new_cluster, fwd_lp, rev_lp

Example #30

0

Show file

File: test_geometry.py Project: jbw900/cogent3

 def test_multiplicative_replacement(self):
     x1 = dirichlet(self.a)
     y1 = insert(x1, 3, 0)
     u = multiplicative_replacement(y1)
     assert allclose(
         y1, u,
         atol=1e-2), "Multiplicative replacement peturbation is too large."
     assert isclose(
         sum(u),
         1), "Multiplicative replacement does not yield a composition."

Example #31

0

Show file

File: IBM2.py Project: daandouwe/IBM-Models

    def random_initialize_t(self):
        """
		Initialize the translations by drawing a Categorical distribution
		(t(f_1|e),...,t(f_{V_f_size}|e)) for each e from a Dirichlet distribution:
			
			(t(f_1|e),...,t(f_{V_f_size}|e)) ~ Dir(0.1,...,0.1).

		"""
        print("Initializing t randomly")
        self.t = dirichlet((0.1, ) * self.V_f_size, size=self.V_e_size).T

Example #32

0

Show file

File: plot_models.py Project: mrktrvr/generative_models

def main_plot_multi_bar():
    from numpy import ones
    from numpy.random import dirichlet
    n_states = 8
    n_cat = 3
    pi = dirichlet(ones(n_states), size=n_cat).T
    print(pi)
    pm = PlotModels(1, 1, 1)
    pm.multi_bar((0, 0), pi)
    embed(header='main_plot_multi_bar')

Example #33

0

Show file

File: multivariate_polya.py Project: dweissman/inference_with_classifiers

def log_multivariate_polya_mc(X, alpha, iterations=1e5):
    """Montecarlo estimation of the log-likelihood of the Dirichlet
    compound multinomial (DCM) distribution, a.k.a. the multivariate
    Polya distribution.
    """
    Theta = dirichlet(alpha, size=int(iterations))
    logp_Hs = gammaln(X.sum() + 1) - gammaln(X + 1).sum()
    logp_Hs += (X * np.log(Theta)).sum(1)

    return logmean(logp_Hs)

Example #34

0

Show file

File: puma.py Project: keeganhines/LECA

 def parameterSample(self, allData):
     '''
     Given the document labels, resample the parameters for each topic.
     '''
     
     allData.sort(key = lambda a: a[0])
     grouped = [(k, [thing[1] for thing in g]) for k,g in groupby(allData, lambda s: s[0])]
     group_counts = [(group[0], reduce(lambda a,b: [a[i] + b[i] for i in range(len(a))]   ,group[1]) ) for group in grouped]
     group_probs = [(k, r.dirichlet(c).tolist()) for k,c in group_counts]
     return group_probs

Example #35

0

Show file

File: gibbs_sampler.py Project: FilipBolt/gibbs

def generate_corpus(categories, vocabulary, documents,
					hyp_pi = None, hyp_thetas = None):
	"""Create true parameters and sample data for a corpus of labeled documents.

	@param categories: number of categories
	@type categories: integer
	@param vocabulary: vocabulary size
	@type vocabulary: integer
	@param documents: number of documents in the corpus
	@type documents: integer
	@param hyp_pi: optional category hyperparamter, default uninformative
	@type hyp_pi: list or None
	@param hyp_thetas: optional word count hyperparamter, default uninformative
	@type hyp_thetas: list or None
	@return: word distributions per category, documents, document labels
	@rtype: tuple
	"""
	# Postavljanje hiperparametara.
	if hyp_pi == None:
		hyp_pi = [1]*categories
	if len(hyp_pi) != categories:
		raise Exception()
	if hyp_thetas == None:
		hyp_thetas = [1]*vocabulary
	if len(hyp_thetas) != vocabulary:
		raise Exception()
	
	categories = 2		# broj kategorija L
	vocabulary = 499	# broj rijeci V
	documents = 10		# broj dokumenata W
	hyp_pi = ones(categories, int)						
	hyp_thetas = ones((categories, vocabulary), int)	
	pi = log(dirichlet(hyp_pi, 1)[0])
	thetas = dirichlet(hyp_thetas, categories)

	corpus = empty((documents, vocabulary), int)
	labels = empty(documents, int)
	for document_index in range(documents):
		category = multinomial_sample(pi)
		labels[document_index] = category
		corpus[document_index] = multinomial(vocabulary*100, thetas[category])
	return log(thetas), corpus, labels

Example #36

0

Show file

File: lda.py Project: mrktrvr/generative_models

 def init_expt(self, data_len):
     '''
     qz.init_expt(data_len, argvs)
     @argvs
     data_len: int
     @self
     expt: (n_states, data_lem)
     '''
     alpha_pi = ones(self.n_cat)
     expt = dirichlet(alpha_pi, size=data_len).T
     self.set_expt(expt)

Example #37

0

Show file

def gibbssample2(bn, prior, likeexp):
    bn1 = copybn(bn)
    for node in bn1.keys():
        for parstates in bn1[node]['cp']:
            p0 = prior[node]['cp'][parstates].values()
            p1 = likeexp[node]['cp'][parstates].values()
            p2 = tuple([x + y for x, y in zip(p0, p1)])
            newparams = random.dirichlet(p2).tolist()
            skeys = bn[node]['cp'][parstates].keys()
            bn1[node]['cp'][parstates] = dict(zip(skeys, newparams))
    return bn1

Example #38

0

Show file

File: TreeUnit_Dirichlet.py Project: sauravdhr/FAVITES

 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= dirichlet(alpha=GC.tree_rate_alpha)
     return str(t)

Example #39

0

Show file

File: test_hdp.py Project: gnoynait/HDP

def gen_data(means, precis, n):
    weight = dirichlet(np.ones(means.shape[0]))
    count = multinomial(n, weight)
    data = np.zeros((n, means.shape[1]))
    start = 0
    for i in range(len(count)):
        data[start: start + count[i], :] = normal(means[i], np.diag(precis[i] * np.ones(means.shape[1])), count[i])
        start = start + count[i]
    s = np.arange(n)
    np.random.shuffle(s)
    return data[s]

Example #40

0

Show file

File: miners.py Project: yasserfarouk/scml

    def _process_cfp(self, cfp: "CFP"):
        if not cfp.is_buy:
            return
        if self.awi.is_bankrupt(
                cfp.publisher) or not self.can_expect_agreement(cfp=cfp,
                                                                margin=0):
            return
        profile = self.profiles.get(cfp.product, None)
        if profile is None:
            return
        if profile.cv == 0:
            alpha_u, alpha_q, alpha_t = (
                profile.alpha_u,
                profile.alpha_q,
                profile.alpha_t,
            )
        else:
            alpha_u, alpha_q, alpha_t = tuple(
                dirichlet((profile.alpha_u, profile.alpha_q, profile.alpha_t),
                          size=1)[0])
        beta_u = pos_gauss(profile.beta_u, profile.cv)
        beta_t = pos_gauss(profile.beta_t, profile.cv)
        beta_q = pos_gauss(profile.beta_q, profile.cv)

        tau_u = pos_gauss(profile.tau_u, profile.cv)
        tau_t = pos_gauss(profile.tau_t, profile.cv)
        tau_q = pos_gauss(profile.tau_q, profile.cv)

        ufun = LinearUtilityAggregationFunction(
            issue_utilities={
                "time": lambda x: x**tau_t / beta_t,
                "quantity": lambda x: x**tau_q / beta_q,
                "unit_price": lambda x: x**tau_u / beta_u,
            },
            weights={
                "time": alpha_t,
                "quantity": alpha_q,
                "unit_price": alpha_u
            },
        )
        ufun.reserved_value = ufun({
            "time":
            cfp.max_time,
            "quantity":
            cfp.max_quantity,
            "unit_price":
            cfp.money_resolution if cfp.money_resolution is not None else 0.0,
        })
        # ufun = normalize(, outcomes=cfp.outcomes, infeasible_cutoff=-1)
        negotiator = self.negotiator_type(name=self.name + "*" +
                                          cfp.publisher[:4],
                                          ufun=ufun)
        self.n_neg_trials[cfp.id] += 1
        self.request_negotiation(cfp=cfp, negotiator=negotiator)

Example #41

0

Show file

def generate_corpus(categories,
                    vocabulary,
                    documents,
                    hyp_pi=None,
                    hyp_thetas=None):
    """Create true parameters and sample data for a corpus of labeled documents.

        @param categories: number of categories
        @type categories: integer
        @param vocabulary: vocabulary size
        @type vocabulary: integer
        @param documents: number of documents in the corpus
        @type documents: integer
        @param hyp_pi: optional category hyperparamter, default uninformative
        @type hyp_pi: list or None
        @param hyp_thetas: optional word count hyperparamter, default uninformative
        @type hyp_thetas: list or None
        @return: word distributions per category, documents, document labels
        @rtype: tuple
        """
    # Set up the hyperparameters.
    if hyp_pi == None:
        hyp_pi = [1] * categories
    if len(hyp_pi) != categories:
        raise Exception()
    if hyp_thetas == None:
        hyp_thetas = [1] * vocabulary
    if len(hyp_thetas) != vocabulary:
        raise Exception()
    # Generate the true model parameters.
    pi = log(dirichlet(hyp_pi, 1)[0])
    thetas = dirichlet(hyp_thetas, categories)
    # Generate the corpus and the true labels.
    corpus = empty((documents, vocabulary), int)
    labels = empty(documents, int)
    for document_index in range(documents):
        category = multinomial_sample(pi)
        labels[document_index] = category
        corpus[document_index] = multinomial(vocabulary * 100,
                                             thetas[category])
    return log(thetas), corpus, labels

Example #42

0

Show file

File: ArtificialDataGenerator.py Project: tianyi-wu/DNML

 def _initialize_sub_word_parameter(self):
     sub_word_parameter = np.zeros((self.n_sub_topics, self.V))
     self.V_grid = np.array(range(self.V)).reshape(self.V_sqrt, self.V_sqrt)
     dim, index = None, None
     sampled = {(dim, index)}
     for sub_topic in range(self.n_sub_topics):
         while (dim, index) in sampled:
             dim, index = int(self.rng.rand() + 0.5), self.rng.randint(0, self.V_sqrt)
         sampled.add((dim, index))
         words = self.V_grid[index, :] if dim < 0.5 else self.V_grid[:, index]
         sub_word_parameter[sub_topic, words] = dirichlet(self.sub_word_dirichlet_parameter)
     return sub_word_parameter

Example #43

0

Show file

File: hmm.py Project: Saito-/NITKC_Research

	def init_emission(self, Obs):
		exists = [False] * self.num_symbols
		for obs in Obs:
			for o in obs:
				exists[o] = True
		no = exists.count(True)
		p = dirichlet([1.0] * no, self.num_states)
		i = 0
		for idx, e in enumerate(exists):
			if e:
				self.B[idx, :] = p[:, i]
				i += 1

Example #44

0

Show file

File: UnsupervisedFinal.py Project: ngweihow/comp30027-project1

def generate_class_prob(classes: int) -> List[float]:
    """
    Generate the random distribution (non-uniform) for each instance in the classifier
    :param classes: The number of classes in the dataset
    :return: The randomised generated probabilities of classes per instance in the data table
    """
    # Call on dirichlet randomiser to generate probabilities
    # Convert the numpy array into a regular non-nested list
    ls: List[float] = ((random.dirichlet(np.ones(classes),
                                         size=1)).tolist())[0]

    return ls

Example #45

0

Show file

    def __init__(self, data_dim=1, pi=None, concentration=1):
        self.data_dim = data_dim
        self.concentration = concentration

        assert pi is not None or concentration is not None, "Either pi or concentration must be specified."
        if pi is not None:
            assert pi.ndim == 1 and pi.size == data_dim and np.all(
                pi >= 0) and np.allclose(pi.sum(), 1)
        else:
            pi = npr.dirichlet(concentration * np.ones(data_dim))

        self.pi = pi

Example #46

0

Show file

def simulate_pf(mean_ret, cov):
    perf, weights = [], []
    for i in range(N_PORTFOLIOS):
        if i % 50000 == 0:
            print(i)
        weights = dirichlet([.08] * n_assets)
        weights /= np.sum(weights)

        r, sd = pf_performance(weights, mean_ret, cov)
        perf.append([r, sd, (r - RF_RATE) / sd])
    perf_df = pd.DataFrame(perf, columns=['ret', 'vol', 'sharpe'])
    return perf_df, weights

Example #47

0

Show file

def init_gibbs(rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, T, yt):
    K = 1
    zt = np.zeros(T, dtype='int')
    beta_vec = dirichlet(np.array([1, gamma0]), size=1)[0]
    beta_new = beta_vec[-1]
    beta_vec = beta_vec[:-1]
    n_mat = np.array([[0]]); # t = 0 count as wt=0, don't need to infer wt
    ysum = np.array([yt[0]])
    ycnt = np.array([np.ones(len(yt[0]))])
    
    zt, n_mat, ysum, ycnt, beta_vec, beta_new, K = sample_one_step_ahead(zt, yt, n_mat, ysum, ycnt, beta_vec, beta_new, alpha0, gamma0, lam_a_pri, lam_b_pri, rho0, K)
    return rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, K, zt, beta_vec, beta_new, n_mat, ysum, ycnt

Example #48

0

Show file

File: dirichlet.py Project: mrktrvr/generative_models

 def sample(self, data_len=1):
     ndim = self.alpha.ndim
     if ndim == 1:
         dst = dirichlet(self.alpha, size=data_len).T
         if data_len == 1:
             dst = dst[:, 0]
     elif ndim == 2:
         dst = zeros((self.n_states, self.len_2d, data_len))
         if self.alpha.shape[0] == self.alpha.shape[1]:
             dst = zeros((self.n_states, self.len_2d, data_len))
             for k in range(self.alpha.shape[0]):
                 dst[:, k] = dirichlet(self.alpha[:, k], size=data_len).T
         else:
             for k in range(self.len_2d):
                 dst[:, k, :] = dirichlet(self.alpha[:, k], size=data_len).T
         if data_len == 1:
             dst = dst[:, :, 0]
     else:
         logger.error('data dim %d is not supported' % ndim)
         dst = None
     return dst

Example #49

0

Show file

File: demoldagibbs.py Project: liangwq/lda-gibbs-em

def generate_corpus():
    corpus = np.zeros((NUM_DOCS, vocab_size), dtype='int64')
    beta = generate_beta()
    for m in range(NUM_DOCS):
        # Get topic distribution for current document
        theta = nprand.dirichlet(alpha)
        for i in range(DOC_LENGTH):
            # Sample topic
            zi = lda.sample(theta)
            w = lda.sample(beta[zi,:])
            corpus[m,w] += 1
    return corpus

Example #50

0

Show file

File: utils_test.py Project: vishalbelsare/trlda

	def test_sample_dirichlet(self):
		N = 100

		for K in [2, 5, 10]:
			for alpha in [.1, .5, 1., 4., 50.]:
				samples0 = dirichlet(zeros(K) + alpha, size=N).T
				samples1 = sample_dirichlet(K, N, alpha)

				p = ks_2samp(samples0.ravel(), samples1.ravel())[1]

				self.assertGreater(p, 1e-6)
				self.assertLess(max(abs(1. - samples1.sum(0))), 1e-6)

Example #51

0

Show file

File: read_simulator.py Project: mbernste/bio-sequence-tools

def generateExpressionLevels(transcripts, distribution):
    expressionLevels = {}
    if distribution == UNIFORM_ARG:
        for transcript in transcripts.values():
            expressionLevels[transcript.name] = 1.0 / len(transcripts)
    elif distribution == DIRICHLET_ARG:
        ones = numpy.ones(len(transcripts))
        probabilities = dirichlet(ones, 1)[0]
        for i, transcript in enumerate(transcripts.values()):
            expressionLevels[transcript.name] = probabilities[i] 
    print "Source expression: " + str(expressionLevels)
    return expressionLevels

Example #52

0

Show file

File: hmm.py Project: EyuEyu/SVI

	def __init__(self, data):

		# observation parameters
		mu = np.mean(data)
		sigma = np.var(data)**0.5
		self.obs_params = np.array([normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5)])
	
		# pi[0] is initial state distribution
		self.pi = np.array([dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1])])

		# pi[1:] becomes transition matrix
		self.A = self.pi[1:]

		# state sequence
		self.x = np.zeros((data.size))

		# observation sequence
		self.y_t = np.zeros((data.size))

		# likelihood potentials
		self.L = np.zeros((5, data.size))

		# standard message passing
		self.forward_messages = hf.compute_forward_messages(data.size, self.pi[0], 
																self.A, self.L)
		self.backward_messages = hf.compute_backward_messages(data.size, self.A, self.L)

Example #53

0

Show file

File: Chap_5_mean_variance.py Project: ljshope/algo_practice

def simulate_portfolios( mean_ret, cov, rf_rate = rf_rate, short = True ):
    alpha = np.full( shape = n_assets, fill_value = .05 )
    weights = dirichlet( alpha = alpha, size = NUM_PF )
    if short:
        weights *= choice( [-1, 1], size = weights.shape )
    returns = weights @mean_ret.values +1 
    returns = returns ** periods_per_year -1 
    std = (weights@weekly_returns.T).std(1)
    std *= np.sqrt( periods_per_year )
    sharpe = (returns-rf_rate)/std
    return pd.DataFrame( { 'Annualized std' : std, 
                           'Annualized Returns' : returns, 
                           'Sharpe Ratio': sharpe}), weights

Example #54

0

Show file

File: ArtificialDataGenerator.py Project: tianyi-wu/DNML

    def generate_artificial_data(self, D, N, noise_threshold=0.0):
        N = int(N)
        self.super_Theta = dirichlet(self.super_dirichlet_parameter, size=D)
        self.sub_Theta = dirichlet(self.super_sub_dirichlet_parameter, size=(D, self.n_super_topics))
        if noise_threshold > 0.0:
            sub_Theta_noise = dirichlet(self.super_sub_noise_dirichlet_parameter, size=(D, self.n_super_topics))
            self.sub_Theta = np.concatenate((self.sub_Theta * (1 - noise_threshold), sub_Theta_noise * noise_threshold),
                                            axis=2)

        self.doc_super = np.array([np.random.multinomial(N, self.super_Theta[i]) for i in range(D)])
        self.doc_sub = np.zeros((D, self.sub_Theta.shape[2]))
        for d, super_counts in enumerate(self.doc_super):
            for super_topic, num in enumerate(super_counts):
                if num > 0:
                    self.doc_sub[d] += np.random.multinomial(num, self.sub_Theta[d][super_topic])

        X = np.zeros((D, self.V), dtype=np.int)
        for d, sub_counts in enumerate(self.doc_sub):
            for sub_topic, num in enumerate(sub_counts):
                if num > 0:
                    X[d] += np.random.multinomial(num, self.sub_word_parameter[sub_topic])
        return X

Example #55

0

Show file

    def sample_node_prob():
        """step 8 of update_network_model, sample new node probability beta"""
        ro = []
        for node in node_ids:
            if node == -1: continue
            ro.append(sample_outlink_size(node) + sample_inlink_size(node))

        print(ro)
        ro.append(fixed['gamma_H'])
        beta = dirichlet(alpha=ro)

        for i, node in enumerate(node_ids):
            state['beta'][node] = beta[i]

Example #56

0

Show file

File: BarebonesPSO.py Project: malliwi88/SimplexProjectors

    def __init__(self, returns, method, swarm=30):
        self.returns = returns
        self.swarm = swarm
        self.corr = returns.corr()
        self.n = len(self.returns.columns)
        self.method = method

        self.pbest_portfolios = []
        self.xbest_portfolios = []
        for i in range(self.swarm):
            weights = nrand.dirichlet(numpy.ones(self.n), 1)[0]
            self.xbest_portfolios.append(Portfolio(self.returns, weights))
            self.pbest_portfolios.append(Portfolio(self.returns, weights))

Example #57

0

Show file

File: mom_gibbs.py Project: budgefeeney/sidetopics

def sample_dirichlet(W, beta, memberships, out=None):
    K, T = memberships.shape[1], W.shape[1]

    prior = np.ndarray((T,), dtype=np.float64)
    if out is None:
        out = np.ndarray((K, T), dtpe=np.float64)

    for k in range(K):
        prior[:] = W.T.dot(memberships[:, k])
        prior += beta
        out[k, :] = rd.dirichlet(prior)

    return out