Example #1
0
def UpdateModelParamsFromPrior(state):
  alpha = state.hypers['alpha']
  alpha_token = state.hypers['alpha_token']
  state.topic_sub_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_subs))))
  state.topic_rel_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_rels))))
  state.type_sub_weights = (
      cDists.SampleDirichletArray(alpha * np.ones((state.n_types, state.n_subs))))
  try:
    state.sub_token_weights = (
        cDists.SampleDirichletArray(alpha_token * np.ones((state.n_subs, state.n_tokens))))
    state.rel_token_weights = (
        cDists.SampleDirichletArray(alpha_token * np.ones((state.n_rels, state.n_tokens))))
  except ZeroDivisionError:
    # If alpha is extremely small, approximate the Dirichlet draw
    # with a delta function
    logging.warning('Token alpha %r very small. Approximating with Dirichlet draw.',
                    alpha_token)
    eps = 1e-4  # Small constant to prevent numeric instability
    sub_assign = random.randint(state.n_tokens, size=state.n_subs)
    ent_token_weights = np.zeros((state.n_subs, state.n_tokens)) + eps
    ent_token_weights[np.arange(state.n_subs), sub_assign] = 1. - eps
    state.sub_token_weights = ent_token_weights
    rel_assign = random.randint(state.n_rels, size=state.n_rels)
    rel_token_weights = np.zeros((state.n_rels, state.n_tokens)) + eps
    rel_token_weights[np.arange(state.n_rels), rel_assign] = 1. - eps
    state.rel_token_weights = rel_token_weights
  state.type_names = pandas.Series({0: 'ref', 1: 'real'})
  state.topic_weights = random.dirichlet(np.repeat(alpha, state.n_topics))
  state.type_weights = random.dirichlet(np.repeat(alpha, state.n_types))
        def sample_topic_parameters(self):
            for i in xrange(self.N):
                z_sum = 0
                for j in I_U[i]:
                    z_sum += self.z_U[i,j]
                self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum)

            for j in xrange(self.M):
                z_sum = 0
                for i in I_V[j]: 
                    z_sum += self.z_V[i,j]
                self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)
def compute_logp_independent_block_mc(N, alpha_row=None, alpha_col=None, iterations=1e5):
    """Compute the montecarlo log likelihood of a matrix under the
    assumption of independence.
    """
    if N.size == 1 : return 0
    if alpha_row is None: alpha_row = np.ones(N.shape[1])
    if alpha_col is None: alpha_col = np.ones(N.shape[0])
    theta_row = dirichlet(alpha_row, size=int(iterations)).T
    theta_col = dirichlet(alpha_col, size=int(iterations)).T
    Theta = theta_row[:,None,:] * theta_col
    logp_ibs = gammaln(N.sum()+1) - gammaln(N+1).sum() + (np.log(Theta)*N[:,:,None]).sum(0).sum(0)
    return logmean(logp_ibs)
    def _initialize_gibbs_sampler(self):
        """
		Initialize the Gibbs sampler

		This sets the initial values of the C{labels} and C{thetas} parameters.
		"""
        pi = log(dirichlet(self.hyp_pi, 1)[0])
        categories = self._categories()
        documents = self._documents()
        self.thetas = empty(self.hyp_thetas.shape)
        for category_index in xrange(categories):
            self.thetas[category_index] = log(dirichlet(self.hyp_thetas[category_index], 1)[0])
        self.labels = array([multinomial_sample(pi) for _ in xrange(documents)])
        def sample_topic_parameters(self):
	    # user topic assignment
            for i in xrange(self.N):
                z_sum = 0
                for j in self.I_U[i]:
                    z_sum += self.z_U[i,j]
                self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum)
	    # item topic assigment
            for j in xrange(self.M):
                z_sum = 0
                for i in I_V[j]: 
                    z_sum += self.z_V[i,j]
                self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)
Example #6
0
    def simulate_combat(self, allowed_time,
                        ant_0_scoring = ConservativeScore,
                        ant_1_scoring = ConservativeScore,
                        log = None):
        start = time.time()
        score_0 = ant_0_scoring(self, 0)
        score_1 = ant_1_scoring(self, 1)
        
        self.allowed_policies()
        init_poses = dict( (a, a.pos) for a in self.ants)
        
        killed = []
        steps = 0
        while (time.time() - start) < allowed_time:
            steps += 1
            action = {}
            for k in killed:
                self.add_ant(k)
            for a,p in init_poses.iteritems():
                a.pos = p
            
            for ant in self.ants:
                ps = dirichlet(self.policy[ant])
                i = multinomial(1, ps).nonzero()[0][0]
                if not (self.move_direction(ant, self.actions[i])):
                    print "CAZZZ"
                action[ant] = i
                
            killed = self.step_turn()
            for a, p in self.policy.iteritems():
                if a.owner == 0:
                    p[action[a]] += score_0(self)
                else:
                    p[action[a]] += score_1(self)

        for k in killed:
            self.add_ant(k)
        for a,p in init_poses.iteritems():
            a.pos = p
        
        retpolicy = {}
        for a,p in self.policy.iteritems():
            ps = dirichlet(p)
            i = multinomial(1, ps).nonzero()[0][0]
            retpolicy[a] = self.actions[i]
        if log is not None:
            log.info("Number of steps: %d", steps)
        else:
            print "Number of steps: ", steps
        return retpolicy
 def _sampleFromModel(self, D=200, T=100, K=10, avgWordsPerDoc = 500):
     '''
     Create a test dataset according to the model
     
     Params:
         D - Sample documents (each with associated features)
         T - Vocabulary size, the number of "terms". Must be a square number
         K - Observed topics
         avgWordsPerDoc - average number of words per document generated (Poisson)
     
     Returns:
         modelState - a model state object configured for training
         tpcs       - the matrix of per-document topic distribution
         vocab      - the matrix of per-topic word distributions
         docLens    - the vector of document lengths
         X          - the DxF side information matrix
         W          - The DxW word matrix
     '''
     
     # Generate vocab
     beta = 0.1
     betaVec = np.ndarray((T,))
     betaVec.fill(beta)
     vocab = rd.dirichlet(betaVec, size=K)
     
     # Geneate the shared covariance matrix
     # ...no real structure in this.
     sigT = rd.random((K,K))
     sigT = sigT.dot(sigT)
     
     # Generate topic mean
     alpha = 1
     alphaVec = np.ndarray((K,))
     alphaVec.fill(alpha)
     topicMean = rd.dirichlet(alphaVec)
     
     # Generate the actual topics.
     tpcs = rd.multivariate_normal(topicMean, sigT, size=D)
     tpcs = rowwise_softmax(tpcs)
     
     # Generate the corpus
     docLens = rd.poisson(avgWordsPerDoc, (D,)).astype(np.float32)
     W = tpcs.dot(vocab)
     W *= docLens[:, np.newaxis]
     W = np.array(W, dtype=np.int32) # truncate word counts to integers
     W = ssp.csr_matrix(W)
     
     # Return the initialised model, the true parameter values, and the
     # generated observations
     return tpcs, vocab, docLens, W
Example #8
0
def generate_corpus():
    beta = np.zeros((NUM_TOPICS, VOCAB_SIZE))
    corpus = np.zeros((NUM_DOCS, VOCAB_SIZE), dtype='int64')
    # Draw per-topic word distributions
    for k in range(NUM_TOPICS):
        beta[k,:] = nprand.dirichlet(eta)
    for m in range(NUM_DOCS):
        # Draw per-document topic distribution
        theta = nprand.dirichlet(alpha)
        for i in range(DOC_LENGTH):
            topic = lda.sample(theta)
            word = lda.sample(beta[topic,:])
            corpus[m,word] += 1
    return (corpus, beta)
Example #9
0
def sample_pi(alpha, zz):
    """ Sample mixing weights from the posterior distribution p(\pi|Z). 

    Inputs
    ------
    alpha: array [nr_clusters]
        The parameters of the prior of the mixing weights Dirichlet(\pi|alpha).

    zz: array [nr_points]
        The observer assignments of each of the N points.

    Output
    ------
    pi: array [nr_clusters]
        Sample for mixing weights.

    """
    K = len(alpha)
    # Count how many times appears each state.
    counts = np.zeros(K)
    for state in zz:
        counts[state] += 1
    alpha_new = np.array(alpha) + counts
    pi = dirichlet(alpha_new)
    return pi
Example #10
0
File: bptd.py Project: aschein/bptd
    def _init_latent_params(self):
        V = self.n_actors
        A = self.n_actions
        T = self.n_timesteps
        R = self.n_regimes
        C = self.n_communities
        K = self.n_topics

        if self.gam is None:
            self.gam = (0.1 ** (1. / 4)) * (R + K + C + C)
            print 'Setting gam to: %f' % self.gam
        self.zeta = 1.
        self.delta = 1.

        self.rho_R = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=R)
        self.nu_K = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=K)
        self.eta_d_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C)
        self.eta_a_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C)

        self.d = 1.
        shp_RKCC = np.ones((R, K, C, C))
        shp_RKCC[:] = np.outer(self.eta_d_C, self.eta_d_C)
        shp_RKCC[:, :, np.identity(C).astype(bool)] = self.eta_a_C * self.eta_d_C
        shp_RKCC *= self.nu_K[None, :, None, None]
        shp_RKCC *= self.rho_R[:, None, None, None]
        self.Lambda_RKCC = sample_gamma(shp_RKCC, 1. / self.d)
        self.Psi_TR = sample_gamma(self.e, 1. / self.f, size=(T, R))
        self.Phi_AK = np.ones((A, K))
        self.Phi_AK[:, :] = rn.dirichlet(self.e * np.ones(A), size=K).T
        self.alpha_V = np.ones(V) * self.e
        self.beta = 1.
        self.Theta_VC = np.ones((V, C))
Example #11
0
 def bayesian_bootstrap(self,par=1):
     weight=npr.dirichlet([par]*self.n,(self.times,self.num_samples))
     samples=[npr.choice(self.data[j],size=self.n,p=weight[j][i]) 
             for i in xrange(self.num_samples) for j in xrange(self.times)]
             #for i in xrange(self.times*self.num_samples)]
     samples=np.array(samples).reshape(self.times,self.num_samples,self.n)
     return samples
Example #12
0
    def sample(self, observations_by_state):
        """
        Sample a new set of distribution parameters given a sample of observations from the given state.

        The internal parameters are updated.

        Parameters
        ----------
        observations :  [ numpy.array with shape (N_k,) ] with nstates elements
            observations[k] are all observations associated with hidden state k

        Examples
        --------

        initialize output model

        >>> B = np.array([[0.5, 0.5], [0.1, 0.9]])
        >>> output_model = DiscreteOutputModel(B)

        sample given observation

        >>> obs = [[0, 0, 0, 1, 1, 1], [1, 1, 1, 1, 1, 1]]
        >>> output_model.sample(obs)

        """
        from numpy.random import dirichlet
        N, M = self._output_probabilities.shape  # nstates, nsymbols
        for i in range(len(observations_by_state)):
            # count symbols found in data
            count = np.bincount(observations_by_state[i], minlength=M).astype(float)
            # sample dirichlet distribution
            count += self.prior[i]
            if count.sum() > 0:  # if counts at all: can't sample, so leave output probabilities as they are.
                self._output_probabilities[i, :] = dirichlet(self.prior[i] + count)
Example #13
0
def newQueryState(data, modelState):
    '''
    Creates a new LDA QueryState object. This contains all
    parameters and random variables tied to individual
    datapoints.

    Param:
    W - the DxT document-term matrix used for training or
        querying.
    modelState - the model state object

    Return:
    A QueryState object
    '''
    docLens = np.squeeze(np.asarray(data.words.sum(axis=1)))

    # Initialise the per-token assignments at random according to the dirichlet hyper
    # This is super-slow
    dist = modelState.topicPrior.copy()
    dist /= dist.sum()

    topicDists  = rd.dirichlet(dist, size=data.doc_count).astype(modelState.dtype)
    topicDists *= docLens[:, np.newaxis]
    topicDists += modelState.topicPrior[np.newaxis, :]

    return QueryState(docLens, topicDists, False)
def test_case_generator():
    sys.path.append('solutions/hw2')
    sys.path.append('suppl/hw2')
    from assignment_two_adaboost import weak_learner as wl
    sys.path.pop()
    sys.path.pop()
    from kernels import rbf

    seed(1)
    instances = normal(size=(50, 5))
    labels = binomial(1, 0.5, 50)
    dist = dirichlet(uniform(size=50))
    ker = rbf(1)
    mat = uniform(size=(5, 5))
    mat = (mat / np.sum(mat, axis=1)).T
    test_cases = {'assignment_two_adaboost': {
        'compute_error':
        [lambda x: x[3] < 0.2, instances, labels, dist],
        'run_adaboost':
        [instances, labels, wl],
        'update_dist':
        [lambda x: x[2] > -0.2, instances,
         labels, dist, normal()],
        'weak_learner': [instances, labels, dist]},
        'assignment_two_pagerank': {'compute_pageranks': [mat],
                                    'main': []},
        'assignment_two_svm': {
        'evaluate_classifier':
        [lambda x: norm(x) > 5, instances, labels],
        'svm_train': [instances, labels, ker]}}

    return test_cases
Example #15
0
def newModelAtRandom(data, K, topicPrior=None, vocabPrior=None, dtype=DTYPE):
    '''
    Creates a new LDA ModelState for the given training set and
    the given number of topics. Everything is instantiated purely
    at random. This contains all parameters independent of of
    the dataset (e.g. learnt priors)
    
    Param:
    data - the dataset of words, features and links of which only words are used in this model
    K - the number of topics
    topicPrior - the prior over topics, either a scalar or a K-dimensional vector
    vocabPrior - the prior over vocabs, either a scalar or a T-dimensional vector
    dtype      - the datatype to be used throughout.
    
    Return:
    A ModelState object
    '''
    assert K > 1, "There must be at least two topics"
    T = data.words.shape[1]
    
    if topicPrior is None:
        topicPrior = constantArray((K,), 50.0 / K, dtype) # From Griffiths and Steyvers 2004
    if vocabPrior is None:
        vocabPrior = 1.1 # Also from G&S
    
    vocabPriorVec = constantArray((T,), vocabPrior, dtype)
    wordDists = rd.dirichlet(vocabPriorVec, size=K).astype(dtype)
    
    # Peturb to avoid zero probabilities
    wordDists += 1./T
    wordDists /= (wordDists.sum(axis=1))[:,np.newaxis]
    
    return ModelState(K, topicPrior, vocabPrior, wordDists, dtype, MODEL_NAME)
 def sample(self):
     '''
     alpha : array
     Parameter of the distribution (k dimension for sample of dimension k).
     size : array
     Number of samples to draw.
     '''
     return npr.dirichlet(self.alpha, size=None)
Example #17
0
 def sample(self, eta, size=1):
     """
     @param eta: the natural parameters
     @param size: the size of the sample
     A sample of sufficient statistics
     """
     from numpy.random import dirichlet
     return self.T(dirichlet(self.theta(eta), size=size))
def compute_logp_independent_block_mc(X, alpha=None, iterations=1e5):
    """Compute the montecarlo log likelihood of a matrix under the
    assumption of independence.
    """
    if alpha is None: alpha = np.ones(X.shape[1])
    Theta = dirichlet(alpha, size=int(iterations)).T
    logp_ibs = gammaln(X.sum(1)+1).sum() - gammaln(X+1).sum(1).sum() + (np.log(Theta[:,None,:])*X[:,:,None]).sum(1).sum(0) # log(\prod(one Multinomial pdf for each row))
    return logmean(logp_ibs)
    def _random_population(self):
        """ Generate a random population on the unit simplex of appropriate
            dimensionality

        """

        rand.seed()

        return rand.dirichlet([1] * len(self.types))
Example #20
0
def node_prob(tran_prior, node_tran_sample, num_node):
    node_prob_mat = tran_prior + node_tran_sample
    for i in range(num_node):
        node_prob_mat[i, :] = npr.dirichlet(node_prob_mat[i, :])
    node_prob_arr = np.reshape(node_prob_mat, num_node * num_node)
    #     print(node_prob_arr,'node_prob_arr')
    #     print(np.sum(node_prob_arr[0:9]),'np.sum(node_prob_arr[0:9])')

    return node_prob_arr
    def apply(self, agents):
        alpha = dirichlet(ones(len(agents)))
        values = {}
        for param in self.params:
            aux = [agent.__dict__[param] for agent in agents]
            values[param] = real_crossover.recombinate(alpha, aux)

        _cls = agents[0].__class__
        return [_cls(**values)]
Example #22
0
def initialize_uniformly(data, D, K):

    mus = npr.uniform(low=0.7, high=1.3, size=[K, D])
    sigmas = np.zeros((K, D, D))
    for k in range(K):
        sigmas[k, :, :] = np.eye(D, D)
    pis = npr.dirichlet(np.ones(K))  #mixture coefficients

    return mus, sigmas, pis
Example #23
0
 def _randomise(self):
     """
     Randomise the variational parameters.
     """
     from numpy.random import dirichlet
     self.phi = dirichlet(ones(self.K), size=self.N)
     self.tau = outer(ones(self.K), self._lambda)
     self.gamma[:, 0] = 1.
     self.gamma[:, 1] = self.alpha
Example #24
0
def posterior_samples(Nlx, Nly, nsamps=100, verbose=False, clipthresh=1e-6):
    estimates = []
    if verbose:
        misc.pnn("\n %d samples to produce:" % nsamps)
    for i in range(nsamps):
        if verbose:
            misc.pnn(i)
        p = np.array([npr.dirichlet(x + 1) for x in Nlx])
        htilde = np.array([npr.dirichlet(x + 1) for x in Nly])

        qtilde = estimation.train_mlm_fixedp(htilde, p)

        # get the central q, for uniqueness
        q = polytopes.find_central(p, qtilde, clipthresh=1e-6)

        estimates.append((p, q))

    return estimates
    def _random_population(self):
        """ Generate a random population on the unit simplex of appropriate
            dimensionality

        """

        rand.seed()

        return rand.dirichlet([1] * len(self.types))
Example #26
0
def gibbssample1(bn, likeexp):
    bn1 = copybn(bn)
    for node in bn1.keys():
        for parstates in bn1[node]['cp']:
            p1 = tuple(likeexp[node]['cp'][parstates].values())
            newparams = random.dirichlet(p1).tolist()
            skeys = bn[node]['cp'][parstates].keys()
            bn1[node]['cp'][parstates] = dict(zip(skeys, newparams))
    return bn1
Example #27
0
def gen_synthetic_graph(N, nc):
    graph = nx.Graph(name='synthezied author graph')
    cluster_sizes = [int(round(cs)) for cs in dirichlet([7] * nc) * N]
    ph_s = dirichlet([1] * nc)
    pr_s = dirichlet([1] * nc)
    pv_s = dirichlet([1] * nc)
    SIGMA = 0.6
    TAU = 0.9
    AVG_PER_CLASS_PROD = 5
    mus = normal(loc=5.5, scale=3, size=nc)
    all_products = range(nc * AVG_PER_CLASS_PROD)
    pi_s = []
    for ci in range(nc):
        pi_s.append(dirichlet([0.5] * len(all_products)))
    author_prod_map = {}

    # generate nodes
    for ci in range(nc):
        for ni in range(cluster_sizes[ci]):
            graph.add_node(len(graph), acluster=ci, revLen=normal(loc=mus[ci], scale=SIGMA),
                           isRealName=binomial(1, pr_s[ci]) == 1, hlpful_fav_unfav=binomial(1, ph_s[ci]) == 1,
                           vrf_prchs_fav_unfav=binomial(1, pv_s[ci]) == 1)
    # generate edges
    for a, b in itertools.combinations(graph.nodes(), 2):
        if not binomial(1, min(15.0/len(graph), 1.0)):
            continue
        if graph.node[a]['acluster'] == graph.node[b]['acluster']:
            if binomial(1, TAU):
                graph.add_edge(a, b, weight=np.clip(normal(1, scale=0.25), 0, 1), denom=5)
        else:
            if binomial(1, 1 - TAU):
                graph.add_edge(a, b, weight=np.clip(normal(0.5, scale=0.25), 0, 1), denom=5)
    # keep only the largest component
    # components = nx.connected_components(graph)
    # largest_component_i = np.argmax([len(c) for c in components])
    # largest_component = set(components[largest_component_i])
    # graph.remove_nodes_from([n for n in graph if n not in largest_component])
    # generate author_prod_map
    for n in graph:
        ci = graph.node[n]['acluster']
        nprods = randint(1, len(all_products)/2)
        author_prod_map[n] = list(np.nonzero(multinomial(nprods, pi_s[ci]))[0])

    return graph, author_prod_map, cluster_sizes
Example #28
0
 def bayesian_bootstrap(self, par=1):
     weight = npr.dirichlet([par] * self.n, (self.times, self.num_samples))
     samples = [
         npr.choice(self.data[j], size=self.n, p=weight[j][i])
         for i in xrange(self.num_samples) for j in xrange(self.times)
     ]
     #for i in xrange(self.times*self.num_samples)]
     samples = np.array(samples).reshape(self.times, self.num_samples,
                                         self.n)
     return samples
Example #29
0
    def jitter(self, concentration=100):
        pi = self.params[0]
        new_pi = npr.dirichlet(concentration * pi) + 1e-8
        new_pi /= new_pi.sum()
        fwd_lp = dirichlet(concentration * pi).logpdf(new_pi)
        rev_lp = dirichlet(concentration * new_pi).logpdf(pi)

        new_cluster = copy.deepcopy(self)
        new_cluster._params = (new_pi,)
        return new_cluster, fwd_lp, rev_lp
Example #30
0
 def test_multiplicative_replacement(self):
     x1 = dirichlet(self.a)
     y1 = insert(x1, 3, 0)
     u = multiplicative_replacement(y1)
     assert allclose(
         y1, u,
         atol=1e-2), "Multiplicative replacement peturbation is too large."
     assert isclose(
         sum(u),
         1), "Multiplicative replacement does not yield a composition."
Example #31
0
    def random_initialize_t(self):
        """
		Initialize the translations by drawing a Categorical distribution
		(t(f_1|e),...,t(f_{V_f_size}|e)) for each e from a Dirichlet distribution:
			
			(t(f_1|e),...,t(f_{V_f_size}|e)) ~ Dir(0.1,...,0.1).

		"""
        print("Initializing t randomly")
        self.t = dirichlet((0.1, ) * self.V_f_size, size=self.V_e_size).T
Example #32
0
def main_plot_multi_bar():
    from numpy import ones
    from numpy.random import dirichlet
    n_states = 8
    n_cat = 3
    pi = dirichlet(ones(n_states), size=n_cat).T
    print(pi)
    pm = PlotModels(1, 1, 1)
    pm.multi_bar((0, 0), pi)
    embed(header='main_plot_multi_bar')
def log_multivariate_polya_mc(X, alpha, iterations=1e5):
    """Montecarlo estimation of the log-likelihood of the Dirichlet
    compound multinomial (DCM) distribution, a.k.a. the multivariate
    Polya distribution.
    """
    Theta = dirichlet(alpha, size=int(iterations))
    logp_Hs = gammaln(X.sum() + 1) - gammaln(X + 1).sum()
    logp_Hs += (X * np.log(Theta)).sum(1)

    return logmean(logp_Hs)
Example #34
0
 def parameterSample(self, allData):
     '''
     Given the document labels, resample the parameters for each topic.
     '''
     
     allData.sort(key = lambda a: a[0])
     grouped = [(k, [thing[1] for thing in g]) for k,g in groupby(allData, lambda s: s[0])]
     group_counts = [(group[0], reduce(lambda a,b: [a[i] + b[i] for i in range(len(a))]   ,group[1]) ) for group in grouped]
     group_probs = [(k, r.dirichlet(c).tolist()) for k,c in group_counts]
     return group_probs
Example #35
0
def generate_corpus(categories, vocabulary, documents,
					hyp_pi = None, hyp_thetas = None):
	"""Create true parameters and sample data for a corpus of labeled documents.

	@param categories: number of categories
	@type categories: integer
	@param vocabulary: vocabulary size
	@type vocabulary: integer
	@param documents: number of documents in the corpus
	@type documents: integer
	@param hyp_pi: optional category hyperparamter, default uninformative
	@type hyp_pi: list or None
	@param hyp_thetas: optional word count hyperparamter, default uninformative
	@type hyp_thetas: list or None
	@return: word distributions per category, documents, document labels
	@rtype: tuple
	"""
	# Postavljanje hiperparametara.
	if hyp_pi == None:
		hyp_pi = [1]*categories
	if len(hyp_pi) != categories:
		raise Exception()
	if hyp_thetas == None:
		hyp_thetas = [1]*vocabulary
	if len(hyp_thetas) != vocabulary:
		raise Exception()
	
	categories = 2		# broj kategorija L
	vocabulary = 499	# broj rijeci V
	documents = 10		# broj dokumenata W
	hyp_pi = ones(categories, int)						
	hyp_thetas = ones((categories, vocabulary), int)	
	pi = log(dirichlet(hyp_pi, 1)[0])
	thetas = dirichlet(hyp_thetas, categories)

	corpus = empty((documents, vocabulary), int)
	labels = empty(documents, int)
	for document_index in range(documents):
		category = multinomial_sample(pi)
		labels[document_index] = category
		corpus[document_index] = multinomial(vocabulary*100, thetas[category])
	return log(thetas), corpus, labels
Example #36
0
 def init_expt(self, data_len):
     '''
     qz.init_expt(data_len, argvs)
     @argvs
     data_len: int
     @self
     expt: (n_states, data_lem)
     '''
     alpha_pi = ones(self.n_cat)
     expt = dirichlet(alpha_pi, size=data_len).T
     self.set_expt(expt)
Example #37
0
def gibbssample2(bn, prior, likeexp):
    bn1 = copybn(bn)
    for node in bn1.keys():
        for parstates in bn1[node]['cp']:
            p0 = prior[node]['cp'][parstates].values()
            p1 = likeexp[node]['cp'][parstates].values()
            p2 = tuple([x + y for x, y in zip(p0, p1)])
            newparams = random.dirichlet(p2).tolist()
            skeys = bn[node]['cp'][parstates].keys()
            bn1[node]['cp'][parstates] = dict(zip(skeys, newparams))
    return bn1
Example #38
0
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= dirichlet(alpha=GC.tree_rate_alpha)
     return str(t)
Example #39
0
def gen_data(means, precis, n):
    weight = dirichlet(np.ones(means.shape[0]))
    count = multinomial(n, weight)
    data = np.zeros((n, means.shape[1]))
    start = 0
    for i in range(len(count)):
        data[start: start + count[i], :] = normal(means[i], np.diag(precis[i] * np.ones(means.shape[1])), count[i])
        start = start + count[i]
    s = np.arange(n)
    np.random.shuffle(s)
    return data[s]
Example #40
0
    def _process_cfp(self, cfp: "CFP"):
        if not cfp.is_buy:
            return
        if self.awi.is_bankrupt(
                cfp.publisher) or not self.can_expect_agreement(cfp=cfp,
                                                                margin=0):
            return
        profile = self.profiles.get(cfp.product, None)
        if profile is None:
            return
        if profile.cv == 0:
            alpha_u, alpha_q, alpha_t = (
                profile.alpha_u,
                profile.alpha_q,
                profile.alpha_t,
            )
        else:
            alpha_u, alpha_q, alpha_t = tuple(
                dirichlet((profile.alpha_u, profile.alpha_q, profile.alpha_t),
                          size=1)[0])
        beta_u = pos_gauss(profile.beta_u, profile.cv)
        beta_t = pos_gauss(profile.beta_t, profile.cv)
        beta_q = pos_gauss(profile.beta_q, profile.cv)

        tau_u = pos_gauss(profile.tau_u, profile.cv)
        tau_t = pos_gauss(profile.tau_t, profile.cv)
        tau_q = pos_gauss(profile.tau_q, profile.cv)

        ufun = LinearUtilityAggregationFunction(
            issue_utilities={
                "time": lambda x: x**tau_t / beta_t,
                "quantity": lambda x: x**tau_q / beta_q,
                "unit_price": lambda x: x**tau_u / beta_u,
            },
            weights={
                "time": alpha_t,
                "quantity": alpha_q,
                "unit_price": alpha_u
            },
        )
        ufun.reserved_value = ufun({
            "time":
            cfp.max_time,
            "quantity":
            cfp.max_quantity,
            "unit_price":
            cfp.money_resolution if cfp.money_resolution is not None else 0.0,
        })
        # ufun = normalize(, outcomes=cfp.outcomes, infeasible_cutoff=-1)
        negotiator = self.negotiator_type(name=self.name + "*" +
                                          cfp.publisher[:4],
                                          ufun=ufun)
        self.n_neg_trials[cfp.id] += 1
        self.request_negotiation(cfp=cfp, negotiator=negotiator)
Example #41
0
def generate_corpus(categories,
                    vocabulary,
                    documents,
                    hyp_pi=None,
                    hyp_thetas=None):
    """Create true parameters and sample data for a corpus of labeled documents.

        @param categories: number of categories
        @type categories: integer
        @param vocabulary: vocabulary size
        @type vocabulary: integer
        @param documents: number of documents in the corpus
        @type documents: integer
        @param hyp_pi: optional category hyperparamter, default uninformative
        @type hyp_pi: list or None
        @param hyp_thetas: optional word count hyperparamter, default uninformative
        @type hyp_thetas: list or None
        @return: word distributions per category, documents, document labels
        @rtype: tuple
        """
    # Set up the hyperparameters.
    if hyp_pi == None:
        hyp_pi = [1] * categories
    if len(hyp_pi) != categories:
        raise Exception()
    if hyp_thetas == None:
        hyp_thetas = [1] * vocabulary
    if len(hyp_thetas) != vocabulary:
        raise Exception()
    # Generate the true model parameters.
    pi = log(dirichlet(hyp_pi, 1)[0])
    thetas = dirichlet(hyp_thetas, categories)
    # Generate the corpus and the true labels.
    corpus = empty((documents, vocabulary), int)
    labels = empty(documents, int)
    for document_index in range(documents):
        category = multinomial_sample(pi)
        labels[document_index] = category
        corpus[document_index] = multinomial(vocabulary * 100,
                                             thetas[category])
    return log(thetas), corpus, labels
Example #42
0
 def _initialize_sub_word_parameter(self):
     sub_word_parameter = np.zeros((self.n_sub_topics, self.V))
     self.V_grid = np.array(range(self.V)).reshape(self.V_sqrt, self.V_sqrt)
     dim, index = None, None
     sampled = {(dim, index)}
     for sub_topic in range(self.n_sub_topics):
         while (dim, index) in sampled:
             dim, index = int(self.rng.rand() + 0.5), self.rng.randint(0, self.V_sqrt)
         sampled.add((dim, index))
         words = self.V_grid[index, :] if dim < 0.5 else self.V_grid[:, index]
         sub_word_parameter[sub_topic, words] = dirichlet(self.sub_word_dirichlet_parameter)
     return sub_word_parameter
Example #43
0
	def init_emission(self, Obs):
		exists = [False] * self.num_symbols
		for obs in Obs:
			for o in obs:
				exists[o] = True
		no = exists.count(True)
		p = dirichlet([1.0] * no, self.num_states)
		i = 0
		for idx, e in enumerate(exists):
			if e:
				self.B[idx, :] = p[:, i]
				i += 1
def generate_class_prob(classes: int) -> List[float]:
    """
    Generate the random distribution (non-uniform) for each instance in the classifier
    :param classes: The number of classes in the dataset
    :return: The randomised generated probabilities of classes per instance in the data table
    """
    # Call on dirichlet randomiser to generate probabilities
    # Convert the numpy array into a regular non-nested list
    ls: List[float] = ((random.dirichlet(np.ones(classes),
                                         size=1)).tolist())[0]

    return ls
Example #45
0
    def __init__(self, data_dim=1, pi=None, concentration=1):
        self.data_dim = data_dim
        self.concentration = concentration

        assert pi is not None or concentration is not None, "Either pi or concentration must be specified."
        if pi is not None:
            assert pi.ndim == 1 and pi.size == data_dim and np.all(
                pi >= 0) and np.allclose(pi.sum(), 1)
        else:
            pi = npr.dirichlet(concentration * np.ones(data_dim))

        self.pi = pi
Example #46
0
def simulate_pf(mean_ret, cov):
    perf, weights = [], []
    for i in range(N_PORTFOLIOS):
        if i % 50000 == 0:
            print(i)
        weights = dirichlet([.08] * n_assets)
        weights /= np.sum(weights)

        r, sd = pf_performance(weights, mean_ret, cov)
        perf.append([r, sd, (r - RF_RATE) / sd])
    perf_df = pd.DataFrame(perf, columns=['ret', 'vol', 'sharpe'])
    return perf_df, weights
Example #47
0
def init_gibbs(rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, T, yt):
    K = 1
    zt = np.zeros(T, dtype='int')
    beta_vec = dirichlet(np.array([1, gamma0]), size=1)[0]
    beta_new = beta_vec[-1]
    beta_vec = beta_vec[:-1]
    n_mat = np.array([[0]]); # t = 0 count as wt=0, don't need to infer wt
    ysum = np.array([yt[0]])
    ycnt = np.array([np.ones(len(yt[0]))])
    
    zt, n_mat, ysum, ycnt, beta_vec, beta_new, K = sample_one_step_ahead(zt, yt, n_mat, ysum, ycnt, beta_vec, beta_new, alpha0, gamma0, lam_a_pri, lam_b_pri, rho0, K)
    return rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, K, zt, beta_vec, beta_new, n_mat, ysum, ycnt
Example #48
0
 def sample(self, data_len=1):
     ndim = self.alpha.ndim
     if ndim == 1:
         dst = dirichlet(self.alpha, size=data_len).T
         if data_len == 1:
             dst = dst[:, 0]
     elif ndim == 2:
         dst = zeros((self.n_states, self.len_2d, data_len))
         if self.alpha.shape[0] == self.alpha.shape[1]:
             dst = zeros((self.n_states, self.len_2d, data_len))
             for k in range(self.alpha.shape[0]):
                 dst[:, k] = dirichlet(self.alpha[:, k], size=data_len).T
         else:
             for k in range(self.len_2d):
                 dst[:, k, :] = dirichlet(self.alpha[:, k], size=data_len).T
         if data_len == 1:
             dst = dst[:, :, 0]
     else:
         logger.error('data dim %d is not supported' % ndim)
         dst = None
     return dst
Example #49
0
def generate_corpus():
    corpus = np.zeros((NUM_DOCS, vocab_size), dtype='int64')
    beta = generate_beta()
    for m in range(NUM_DOCS):
        # Get topic distribution for current document
        theta = nprand.dirichlet(alpha)
        for i in range(DOC_LENGTH):
            # Sample topic
            zi = lda.sample(theta)
            w = lda.sample(beta[zi,:])
            corpus[m,w] += 1
    return corpus
Example #50
0
	def test_sample_dirichlet(self):
		N = 100

		for K in [2, 5, 10]:
			for alpha in [.1, .5, 1., 4., 50.]:
				samples0 = dirichlet(zeros(K) + alpha, size=N).T
				samples1 = sample_dirichlet(K, N, alpha)

				p = ks_2samp(samples0.ravel(), samples1.ravel())[1]

				self.assertGreater(p, 1e-6)
				self.assertLess(max(abs(1. - samples1.sum(0))), 1e-6)
def generateExpressionLevels(transcripts, distribution):
    expressionLevels = {}
    if distribution == UNIFORM_ARG:
        for transcript in transcripts.values():
            expressionLevels[transcript.name] = 1.0 / len(transcripts)
    elif distribution == DIRICHLET_ARG:
        ones = numpy.ones(len(transcripts))
        probabilities = dirichlet(ones, 1)[0]
        for i, transcript in enumerate(transcripts.values()):
            expressionLevels[transcript.name] = probabilities[i] 
    print "Source expression: " + str(expressionLevels)
    return expressionLevels
Example #52
0
File: hmm.py Project: EyuEyu/SVI
	def __init__(self, data):

		# observation parameters
		mu = np.mean(data)
		sigma = np.var(data)**0.5
		self.obs_params = np.array([normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5),
											normal(loc=mu, scale=sigma, size=5)])
	
		# pi[0] is initial state distribution
		self.pi = np.array([dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1]),
						dirichlet([1, 1, 1, 1, 1])])

		# pi[1:] becomes transition matrix
		self.A = self.pi[1:]

		# state sequence
		self.x = np.zeros((data.size))

		# observation sequence
		self.y_t = np.zeros((data.size))

		# likelihood potentials
		self.L = np.zeros((5, data.size))

		# standard message passing
		self.forward_messages = hf.compute_forward_messages(data.size, self.pi[0], 
																self.A, self.L)
		self.backward_messages = hf.compute_backward_messages(data.size, self.A, self.L)
def simulate_portfolios( mean_ret, cov, rf_rate = rf_rate, short = True ):
    alpha = np.full( shape = n_assets, fill_value = .05 )
    weights = dirichlet( alpha = alpha, size = NUM_PF )
    if short:
        weights *= choice( [-1, 1], size = weights.shape )
    returns = weights @mean_ret.values +1 
    returns = returns ** periods_per_year -1 
    std = (weights@weekly_returns.T).std(1)
    std *= np.sqrt( periods_per_year )
    sharpe = (returns-rf_rate)/std
    return pd.DataFrame( { 'Annualized std' : std, 
                           'Annualized Returns' : returns, 
                           'Sharpe Ratio': sharpe}), weights    
Example #54
0
    def generate_artificial_data(self, D, N, noise_threshold=0.0):
        N = int(N)
        self.super_Theta = dirichlet(self.super_dirichlet_parameter, size=D)
        self.sub_Theta = dirichlet(self.super_sub_dirichlet_parameter, size=(D, self.n_super_topics))
        if noise_threshold > 0.0:
            sub_Theta_noise = dirichlet(self.super_sub_noise_dirichlet_parameter, size=(D, self.n_super_topics))
            self.sub_Theta = np.concatenate((self.sub_Theta * (1 - noise_threshold), sub_Theta_noise * noise_threshold),
                                            axis=2)

        self.doc_super = np.array([np.random.multinomial(N, self.super_Theta[i]) for i in range(D)])
        self.doc_sub = np.zeros((D, self.sub_Theta.shape[2]))
        for d, super_counts in enumerate(self.doc_super):
            for super_topic, num in enumerate(super_counts):
                if num > 0:
                    self.doc_sub[d] += np.random.multinomial(num, self.sub_Theta[d][super_topic])

        X = np.zeros((D, self.V), dtype=np.int)
        for d, sub_counts in enumerate(self.doc_sub):
            for sub_topic, num in enumerate(sub_counts):
                if num > 0:
                    X[d] += np.random.multinomial(num, self.sub_word_parameter[sub_topic])
        return X
Example #55
0
    def sample_node_prob():
        """step 8 of update_network_model, sample new node probability beta"""
        ro = []
        for node in node_ids:
            if node == -1: continue
            ro.append(sample_outlink_size(node) + sample_inlink_size(node))

        print(ro)
        ro.append(fixed['gamma_H'])
        beta = dirichlet(alpha=ro)

        for i, node in enumerate(node_ids):
            state['beta'][node] = beta[i]
    def __init__(self, returns, method, swarm=30):
        self.returns = returns
        self.swarm = swarm
        self.corr = returns.corr()
        self.n = len(self.returns.columns)
        self.method = method

        self.pbest_portfolios = []
        self.xbest_portfolios = []
        for i in range(self.swarm):
            weights = nrand.dirichlet(numpy.ones(self.n), 1)[0]
            self.xbest_portfolios.append(Portfolio(self.returns, weights))
            self.pbest_portfolios.append(Portfolio(self.returns, weights))
Example #57
0
def sample_dirichlet(W, beta, memberships, out=None):
    K, T = memberships.shape[1], W.shape[1]

    prior = np.ndarray((T,), dtype=np.float64)
    if out is None:
        out = np.ndarray((K, T), dtpe=np.float64)

    for k in range(K):
        prior[:] = W.T.dot(memberships[:, k])
        prior += beta
        out[k, :] = rd.dirichlet(prior)

    return out