Esempi in Python per rvs, esempi in Python per scipy.stats.dirichlet.rvs

Esempio n. 1

0

Mostra file

	def update_beliefs(self, observation_dict=None):
		"""
		Este método crea el esqueleto de las CPTs usando una distribución Dirichlet.
		Usa el diccionario alpha para crear las CPTs y guardarlas en el diccionario 
		de beliefs.
		"""
		# primero actualizo mis alphas
		if observation_dict:
			self.update_alpha_parameters(observation_dict)
		# recorrer el diccionario alpha
		for variable in self.alpha_params:
			table = []
			if not self.alpha_params[variable]["has_parents"]:
				alpha = self.alpha_params[variable][""]
				table = dirichlet.rvs(alpha, size=1).tolist()
			else:
				for parents_instance in self.alpha_params[variable]:
					if parents_instance == "has_parents":
						continue
					alpha = self.alpha_params[variable][parents_instance]
					probabilities = np.squeeze(dirichlet.rvs(alpha, size=1))
					table.append(probabilities)
				table = np.array(table).transpose().tolist()
			self.beliefs[variable] = table
		logging.info("Beliefs after update")
		logging.info(json.dumps(self.beliefs, indent=2))

Esempio n. 2

0

Mostra file

File: MOU.py Progetto: Fitz-like-coding/LDA

    def _sampler(self, doc_term):
        docs_number = doc_term.shape[0]
        vobs_number = doc_term.shape[1]
        Phi = dirichlet.rvs([self.Beta]*vobs_number, size=self.K, random_state=self.seed)
        Theta = dirichlet.rvs([self.Alpha]*self.K, random_state=self.seed)
        Z = [[self.Alpha]*self.K] * doc_term.shape[0] * Theta
        print (Phi)
        print (Theta)

        itr = 0
        while itr <= self.max_iter:
            # update Phi and Beta
            e = self.Beta + np.multiply(doc_term, Z.T.reshape(self.K, doc_term.shape[0],1)).sum(axis=1) - 1 + 0.1
            for k in range(len(e)):
                Phi[k] = dirichlet.rvs(e[k], random_state=None)

            # update Z
            temp = np.power(np.repeat(Phi.reshape(Phi.shape[0], -1, Phi.shape[1]), doc_term.shape[0], axis=1), doc_term)
            temp = np.prod(temp, axis=2)
            Z = np.multiply(temp, Z.T).T
            Z = Z/Z.sum(axis=1, keepdims=True)

            # update Theta
            d = self.Alpha + Z.sum(axis=0, keepdims=True)
            Theta = dirichlet.rvs(d[0], random_state=None)

            itr += 1

        return Phi, Theta, Z

Esempio n. 3

0

Mostra file

    def __init__(self, k, MD, alpha, beta, random_state=None):
        self.k = k
        if isinstance(alpha, numbers.Real):
            self.alpha = alpha * np.ones(self.k)
        else:
            self.alpha = alpha
        assert len(self.alpha) == self.k

        self.MD = MD
        if isinstance(beta, numbers.Real):
            self.betas = [[beta * np.ones(md) for md in self.MD]
                          for _ in range(self.k)]
        else:
            self.betas = beta
        for kk in range(self.k):
            for d, md in enumerate(MD):
                assert len(self.betas[kk][d]) == md

        self.random_state = random_state
        self.weights = dirichlet.rvs(self.alpha,
                                     size=1,
                                     random_state=self.random_state)[0]
        self.thetas = [[
            dirichlet.rvs(beta_1_dim, size=1,
                          random_state=self.random_state)[0]
            for beta_1_dim in self.betas[kk]
        ] for kk in range(self.k)]

Esempio n. 4

0

Mostra file

File: emb_generate.py Progetto: JasonLC506/UMR

def generate_emb(
        n,
        dim,
        model_spec,
):
    common_init = dirichlet.rvs(
        np.ones(dim, dtype=np.float32) * model_spec["common_init_alpha"]
    ).squeeze()
    common = model_spec["rescale_common"] * common_init
    embs = dirichlet.rvs(
        common,
        size=n
    )
    return embs

Esempio n. 5

0

Mostra file

File: test_fastlda_geweke.py Progetto: zshwuhan/ijcai-iml-2016

def _forward_sample(ldactr, n_docs, n_topics, n_words, n_words_per_doc, alpha,
                    beta):
    # docs, _ = gen_docs(n_docs, n_topics, n_words, n_words_per_doc, alpha, beta)
    # lda = ldactr(docs, n_topics, n_words, alpha, beta, seed)
    # z, asgn, n_dk, _ = _get_lda_properties(lda)
    docs = [{'w': [0] * n_words_per_doc} for _ in range(n_docs)]
    n_dk = np.zeros((
        n_docs,
        n_topics,
    ))
    n_kw = np.zeros((
        n_topics,
        n_words,
    ))
    asgn = []
    for d in range(n_docs):
        asgn_d = []
        theta_d = dirichlet.rvs([alpha] * n_topics)[0]
        for _ in range(n_words_per_doc):
            k = discrete_draw(theta_d)
            asgn_d.append(k)
            n_dk[d, k] += 1
        asgn.append(asgn_d)
    z = [item for sublist in asgn for item in sublist]
    docs, phi = _geweke_draw_docs(docs,
                                  asgn,
                                  n_dk,
                                  n_kw,
                                  alpha,
                                  beta,
                                  init=True)
    return docs, z, phi

Esempio n. 6

0

Mostra file

File: amortised_MOG.py Progetto: zalandoresearch/SWARM

def data_fn(n_tasks=10,
            n_clust=4,
            n_entities=(100, 500),
            n_dim=2,
            device=None,
            return_params=False):
    n_ent = np.random.randint(*n_entities)

    x = np.random.randn(n_tasks, n_ent, n_dim) * 0.3
    mu = np.random.rand(n_tasks, n_dim, n_clust) * 8 - 4
    pi = dirichlet.rvs(np.ones(n_clust), n_tasks)
    idx = np.zeros((n_tasks, n_ent), dtype=np.long)

    for i in range(n_tasks):
        idx[i] = np.random.choice(n_clust, n_ent, p=pi[i])
        x[i] += mu[i, :, idx[i]]

    x = torch.tensor(x).float().to(device)
    idx = torch.tensor(idx).to(device)

    if not return_params:
        return x, idx, None

    si = 0.3 * torch.ones(n_tasks, n_dim, n_clust).float().to(device)
    mu = torch.tensor(mu).float().to(device)
    pi = torch.log(torch.tensor(pi).float()).to(device)
    return x, idx, None, mu, si, pi

Esempio n. 7

0

Mostra file

File: main_figures.py Progetto: ShotaSHIBASAKI/Switching_Environment

def SmaplingDIr(obs, index):
    """
    Estimating 95% Heighest posterior deinsty (HDP) in Dirichlet distribution.
    Although observed data follows a multinomial-distribution,
    we can also use the cases when the data are binary (in such cases,DIrichlet is Beta dist)

    Parameters
    ----------
    obs : 1D array
        number of obseraving each event.
    index : int
        event to analyze its occuring probability.

    Returns
    -------
    95% HPD of occuring event index
    """
    a = np.ones(np.size(obs))  # prior is uniform
    a += obs  # posterior after observing data
    #posterior = dirichlet(a)
    sample = dirichlet.rvs(
        a, size=10000)  #sampleing parameter values from posterior distirbution
    sample_index = sample[:, index]  # focus on parameter[index] alone
    """
    plt.hist(sample_index)
    plt.xlim(0,1)
    plt.show()
    """
    # calculate 95% HDI
    return pm.stats.hpd(sample_index)

Esempio n. 8

0

Mostra file

def generate_constraints_dirichlet(ntasks, total):
    """Generate some random constraints that sum to a given total.
    These constraints are uniformly distributed via the Dirichlet
    Distribution"""
    ret = dirichlet.rvs([1]*ntasks)[0] * total
    assert abs(sum(ret) - total) < 1e-5  # Allows for some floating point error
    return list(ret)

Esempio n. 9

0

Mostra file

def random_initialization(data, num_components, seed=None):
    """
    Initializes parameters randomly.

    :param data: observed data
    :param num_components: number of components
    :param seed: random seed
    :return:
    """

    dim = data.shape[1]
    alpha = np.ones(num_components)
    mixture_weights = dirichlet.rvs(alpha, size=1, random_state=seed)[0]
    min_values = np.min(data, axis=0)
    max_values = np.max(data, axis=0)
    # Means are generated randomly within the data range
    means = list((max_values - min_values) *
                 np.random.rand(num_components, dim) + min_values)
    covariances = [
        0.25 * np.diag(
            np.abs((max_values - min_values) * np.random.rand(2) + min_values))
        for _ in range(num_components)
    ]

    return mixture_weights, means, covariances

Esempio n. 10

0

Mostra file

 def draw(self, K = 10, N = 1*10**5, m = 3, gaussian = False):
     
     if self.seed is not None:
         np.random.seed(self.seed)
  
     alphas = gamma.rvs(5, size=m)               # shape parameter
     #print(sum(alphas))                              # equivalent sample size
     self.p = dirichlet.rvs(alpha = alphas, size = 1)[0]
     self.phi_is = multinomial.rvs(1, self.p, size=N)       # draw from categorical p.m.f
     
     self.x_draws = np.zeros((N,K))
     self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(), dict(), dict(), tuple(), tuple(), tuple()
     
     for i in range(m):
     
           self.hyper_loc["mean"+str(i+1)] = norm.rvs(size = 1, loc = 0, scale = 5)
           self.hyper_scale["scale"+str(i+1)] = 1/gamma.rvs(5, size=1)
           
           self.thetas["mean"+str(i+1)] = norm.rvs(size = K, loc = self.hyper_loc["mean"+str(i+1)], 
                       scale = self.hyper_scale["scale"+str(i+1)])
           self.thetas["Sigma"+str(i+1)] = np.eye(K)*(1/gamma.rvs(5, size=K))
           self.thetas["nu"+str(i+1)] = randint.rvs(K+2, K+10, size=1)[0]
     
           if gaussian:
              self.covs += (self.thetas['Sigma'+str(i+1)], )
           else:
              self.covs += (wishart.rvs(df = self.thetas['nu'+str(i+1)], scale = self.thetas['Sigma'+str(i+1)], size=1),)
              self.var += (self.thetas["nu"+str(i+1)]/(self.thetas["nu"+str(i+1)]-2)*self.covs[i],)       # variance covariance matrix of first Student-t component
           self.rdraws += (np.random.multivariate_normal(self.thetas["mean"+str(i+1)], self.covs[i], N),)
     
           self.Phi = np.tile(self.phi_is[:,i], K).reshape(K,N).T              # repeat phi vector to match with random matrix
           self.x_draws += np.multiply(self.Phi, self.rdraws[i])                
     return self.x_draws

Esempio n. 11

0

Mostra file

File: cevaetools.py Progetto: skwfz/critical_look_causal_dlvms

def generate_categorical_dist(z_alpha,x_alpha,t_alpha,y_alpha):
    """z_alpha of shape (z_categories,), x_alpha of shape (z_categories, x_dim, x_categories),
    t_alpha of shape (z_categories, t_categories), y_alpha of shape (z_categories, t,categories, y_categories)"""
    z_c, x_dim, x_c = x_alpha.shape
    _, t_c, y_c = y_alpha.shape
    z_probs = dirichlet.rvs(z_alpha, size=1)[0]
    x_probs = np.zeros(x_alpha.shape)
    t_probs = np.zeros(t_alpha.shape)
    y_probs = np.zeros(y_alpha.shape)
    for z_cat in range(z_c):
        for x_d in range(x_dim):
            x_probs[z_cat,x_d,:] = dirichlet.rvs(x_alpha[z_cat,x_d,:],size=1)[0]
        t_probs[z_cat,:] = dirichlet.rvs(t_alpha[z_cat,:],size=1)[0]
        for t_cat in range(t_c):
            y_probs[z_cat,t_cat,:] = dirichlet.rvs(y_alpha[z_cat,t_cat,:],size=1)[0]
    return z_probs,x_probs,t_probs,y_probs

Esempio n. 12

0

Mostra file

 def random_sample(self):
     alphas = np.random.random_sample(
         self.nb) * (self.aub - self.alb) + self.alb
     betas = np.random.random_sample(
         self.nb) * (self.bub - self.blb) + self.blb
     omegas = D.rvs([1] * self.nb)[0]
     return alphas, betas, omegas

Esempio n. 13

0

Mostra file

    def sample(self, point, n_samples=1):
        """Sample from the Dirichlet distribution.

        Sample from the Dirichlet distribution with parameters provided
        by point. This gives n_samples points in the simplex.

        Parameters
        ----------
        point : array-like, shape=[..., dim]
            Point representing a Dirichlet distribution.
        n_samples : int
            Number of points to sample for each set of parameters in point.
            Optional, default: 1.

        Returns
        -------
        samples : array-like, shape=[..., n_samples]
            Sample from the Dirichlet distributions.
        """
        geomstats.errors.check_belongs(point, self)
        point = gs.to_ndarray(point, to_ndim=2)
        samples = []
        for param in point:
            samples.append(gs.array(dirichlet.rvs(param, size=n_samples)))
        return samples[0] if len(point) == 1 else gs.stack(samples)

Esempio n. 14

0

Mostra file

File: rand_initialization.py Progetto: negar7918/iMHMM

def generate_categorical_prob(num_of_categories, alpha=None):
    if alpha is None:
        alpha = np.random.mtrand.dirichlet([10] * num_of_categories)
    else:
        alpha = alpha * np.ones(num_of_categories)
    var = dirichlet.rvs(alpha=alpha, size=1, random_state=None)
    return var[0]

Esempio n. 15

0

Mostra file

def mcts(game,
         evaluator,
         num_simulations=32,
         add_dir_noise=True,
         dir_noise_weight=DIRICHLET_WEIGHT,
         dir_alpha=DIRICHLET_ALPHA,
         verbose=False):
    def _gen_pi(node):
        pi_t = np.zeros_like(policy)
        for child_id in node.children:
            pi_t[child_id] = 1. * node.children[child_id].num_visit
        return 1. / (node.num_visit - 1) * pi_t

    v, p = evaluator.predict(
        np.reshape(game.game_state * game.current_player.color,
                   newshape=(1, game.board.board_size, game.board.board_size,
                             1)))
    policy = p[0]
    if add_dir_noise:
        policy = (1-dir_noise_weight)*policy + \
                 dir_noise_weight*dirichlet.rvs([dir_alpha]*(p.shape[1]))[0]
    root = Node(value=v[0, 0], policy=policy)
    for i in range(num_simulations):
        # debug('start %d-th simulation' % (i+1))
        game_status = game.get_game_status()
        simulation(root, game, evaluator)
        game.reset(game_status)
    return _gen_pi(root)

Esempio n. 16

0

Mostra file

File: Sample_from_distribution.py Progetto: Frostwoods/HLCL

    def set_random_probabilities(self):
        # set get 成对出现
        # 输入 输出
        prior = np.random.randint(1, 10, self.valuenumber)
        #fix prior =（120，22，23，……，）

        self.probabilities = dirichlet.rvs(prior)[0]

Esempio n. 17

0

Mostra file

File: main_figures.py Progetto: ShotaSHIBASAKI/Switching_Environment

def Fig5AC():
    os.chdir('../appendix/Appendix3_constant_env/scenario1')
    CompExcl = np.loadtxt('CompetitiveExclusion_model1.csv',
                          delimiter=',',
                          skiprows=1)
    BothExtinct = np.loadtxt('BothExtinction_model1.csv',
                             delimiter=',',
                             skiprows=1)
    death = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
    col_list = ['#8dd3c7', '#fb8072', '#80b1d3']
    lab_list = ['scarce', 'mean', 'abundant']
    width = 0.05
    for i in range(len(lab_list)):

        plt.bar(death,
                CompExcl[:, i],
                color=col_list[i],
                width=width,
                align='center',
                hatch='..',
                label='exclusion of fittest')
        plt.bar(death,
                BothExtinct[:, i],
                color=col_list[i],
                label='both extinction',
                width=width,
                align='center',
                bottom=CompExcl[:, i])

        plt.xticks(fontsize=16)
        plt.ylabel('probability', fontsize=20)
        plt.yticks(fontsize=16)
        plt.ylim(0, 1)
        plt.xlabel('toxin sensitivity', fontsize=20)
        plt.legend(loc='lower center',
                   bbox_to_anchor=(.5, 1.05),
                   ncol=2,
                   fontsize=16)

        plt.plot(death,
                 CompExcl[:, i],
                 color='k',
                 label=lab_list[i],
                 linewidth=3)
        plt.text(x=0.05, y=0.9, s=lab_list[i], fontsize=20)

        for j in range(np.size(death)):
            comp = CompExcl[j, i]
            a = np.array([comp, 1 - comp
                          ]) * 10**5  # total number of simulations is 10**5
            sample = dirichlet.rvs(a + 1, size=10000)
            hdp_l, hdp_h = pm.stats.hpd(sample[:, 0])
            plt.vlines(death[j], hdp_l, hdp_h, color=col_list[i])
        plt.savefig('ConstantEnv_ver2' + lab_list[i] + '.pdf',
                    bbox_inches='tight',
                    pad_inches=0.05)
        plt.show()

Esempio n. 18

0

Mostra file

File: gmm_utils.py Progetto: AVoss84/gauss_mix

    def draw(self, K=10, N=1 * 10**5, m=3, gaussian=False):
        """
        Inputs:
        -------
        N: sample size
        K: Dimension of Normal/Student distr.
        m: number of mixture components
        """
        np.random.seed(self.seed)
        self.st0 = np.random.get_state()  # get initial state of RNG
        #np.random.set_state(self.st0)
        print("Drawing from", m, "component mixture distribution.")
        alphas = gamma.rvs(5, size=m)  # shape parameter
        #print(sum(alphas))                              # equivalent sample size
        self.p = dirichlet.rvs(alpha=alphas, size=1)[0]
        self.phi_is = multinomial.rvs(1, self.p,
                                      size=N)  # draw from categorical p.m.f

        self.x_draws = np.zeros((N, K))
        self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(
        ), dict(), dict(), tuple(), tuple(), tuple()

        for i in range(m):

            self.hyper_loc["mean" + str(i + 1)] = norm.rvs(size=1,
                                                           loc=0,
                                                           scale=5)
            self.hyper_scale["scale" + str(i + 1)] = 1 / gamma.rvs(5, size=1)

            self.thetas["mean" + str(i + 1)] = norm.rvs(
                size=K,
                loc=self.hyper_loc["mean" + str(i + 1)],
                scale=self.hyper_scale["scale" + str(i + 1)])
            self.thetas["Sigma" +
                        str(i + 1)] = np.eye(K) * (1 / gamma.rvs(5, size=K))
            self.thetas["nu" + str(i + 1)] = randint.rvs(K + 2, K + 10,
                                                         size=1)[0]

            if gaussian:
                self.covs += (self.thetas['Sigma' + str(i + 1)], )
            else:
                self.covs += (wishart.rvs(df=self.thetas['nu' + str(i + 1)],
                                          scale=self.thetas['Sigma' +
                                                            str(i + 1)],
                                          size=1), )
                self.var += (
                    self.thetas["nu" + str(i + 1)] /
                    (self.thetas["nu" + str(i + 1)] - 2) * self.covs[i],
                )  # variance covariance matrix of first Student-t component
            self.rdraws += (np.random.multivariate_normal(
                self.thetas["mean" + str(i + 1)], self.covs[i], N), )

            self.Phi = np.tile(self.phi_is[:, i], K).reshape(
                K, N).T  # repeat phi vector to match with random matrix
            self.x_draws += np.multiply(self.Phi, self.rdraws[i])

        return self.x_draws, np.argmax(self.phi_is, 1)  # X, latent

Esempio n. 19

0

Mostra file

    def sample(cls, params=None, nat_params=None, size=1):
        # Sample from P( x | Ѳ; α )
        assert (params is None) ^ (nat_params is None)

        (alpha, ) = params if params is not None else cls.natToStandard(
            *nat_params)
        ans = dirichlet.rvs(alpha=alpha, size=size)
        cls.checkShape(ans)
        return ans

Esempio n. 20

0

Mostra file

 def __init__(self, K):
     fig = plt.figure(figsize=(12, 8), facecolor='white')
     ax_1 = fig.add_subplot(111, frameon=False)
     plt.show(block=False)
     self.ax_1 = ax_1
     self.fig = fig
    
     self.xlim = [-6, 6.]
     self.ylim = [-6, 6.]
     self.num_its = 10
     
     if K==3:
         colours = np.eye(3) # if there are there colours may as well use rgb
     else:
         colours = dirichlet.rvs(0.1*np.ones(3), K) \
                   + dirichlet.rvs(0.1*np.ones(3), K) # else try to take some contrasting ones
         colours = colours/sum(colours)
     self.colours = colours
     self.z_to_colour = lambda z: colours.T.dot(np.reshape(z, (K, 1)))

Esempio n. 21

0

Mostra file

File: get_dirichlet.py Progetto: EverybodyHops/FedNS

 def test(self, alpha, p, size=1):
     q = alpha * p
     d = dirichlet.rvs(q, size, self.random_state)
     print(d.shape)
     mean = np.zeros(q.shape)
     for i in d:
         mean = mean + i
         # print(i)
     mean = mean / size
     print(mean)

Esempio n. 22

0

Mostra file

def __sample_dirichlet__(n, max_alpha=10):
    """
    Samples from the Dirichlet distribution to a produce
    a probability vector of length n. The sum of each probability
    in the probability vector should sum to 1.
    :param n: Number of alpha's to sample.
    :param max_alpha: The maximum alpha.
    :return: Array of Dirichlet distributed values.
    """
    return np.array(dirichlet.rvs(__generate_alphas__(n, max_alpha))[0])

Esempio n. 23

0

Mostra file

def dist_in_hull(points, n):
    dims = points.shape[-1]
    hull = points[ConvexHull(points).vertices]
    deln = points[Delaunay(hull).simplices]

    vols = np.abs(
        det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)
    sample = np.random.choice(len(vols), size=n, p=vols / vols.sum())

    return np.einsum('ijk, ij -> ik', deln[sample],
                     dirichlet.rvs([1] * (dims + 1), size=n))

Esempio n. 24

0

Mostra file

File: belief_transition_model.py Progetto: bastianalt/correlation_priors_for_rl

    def draw(self, N_samples=1):
        """

        Returns
        -------
        Posterior draw from the transition model
        """
        # generate transition model
        if N_samples == 1:
            T = np.zeros_like(self.data)
            alpha_post = self.alpha + self.data
            for curState in range(self.nStates):
                for action in range(self.nActions):
                    T[:, curState, action] = dirichlet.rvs(alpha_post[:, curState, action])
        else:
            T = [np.zeros_like(self.data) for _ in range(N_samples)]
            alpha_post = self.alpha + self.data
            for curState in range(self.nStates):
                for action in range(self.nActions):
                    samples = np.split(dirichlet.rvs(alpha_post[:, curState, action], size=N_samples), N_samples)
                    for s, sample in enumerate(samples):
                        T[s][:, curState, action] = sample
        return T

Esempio n. 25

0

Mostra file

File: lda.py Progetto: BaxterEaves/ijcai-iml-2016

    def __init__(self, docs, n_topics, n_words, alpha=1.0, beta=1.0,
                 init_mode='prior', seed=None):
        """
        Parameters
        ----------
        docs : list<dict>
            list of document data structures
        n_topics : int
            number of topics
        n_words : int
            number of words in corpus (vocabulary)
        alpha : float (0, Inf), optional
            symmetric Dirchlet parameter for word/document distribution
        beta : float (0, Inf), optional
            symmetric Drichlet parameter for topic/document distribution
        """
        self._docs = docs
        self._n_docs = len(docs)
        self._n_topics = n_topics
        self._n_words = n_words
        self._alpha = alpha
        self._beta = beta

        # number of words assigned to topic k in doc d
        self._n_dk = np.zeros((self._n_docs, self._n_topics,))
        # number of times word w is assigned to topic k
        self._n_kw = np.zeros((self._n_topics, self._n_words,))
        # number of times any word is assigned to topic k
        self._n_k = np.zeros((1, self._n_topics,))
        # Entry z[d][w] is the topic to which the w^th word in document d is
        # assigned
        self._z = []
        self._key = []

        for d, doc in enumerate(self._docs):
            self._z.append([])
            if init_mode == 'prior':
                theta_k = dirichlet.rvs([self._alpha]*self._n_topics)[0]
            elif init_mode == 'random':
                theta_k = np.ones(self._n_topics)/self._n_topics
            else:
                raise ValueError("init_mode must be 'random' or 'prior'")
            for w, wrd in enumerate(doc['w']):
                topic = int(discrete_draw(theta_k))
                self._z[d].append(topic)

                self._n_dk[d, topic] += 1.0
                self._n_kw[topic, wrd] += 1.0
                self._n_k[0, topic] += 1.0
                self._key.append((d, wrd, w,))

Esempio n. 26

0

Mostra file

File: bernstein_simulation.py Progetto: mberaha/ProjectedWasserstein

def simulate_data(ndata):
    L = 500
    beta_dens = np.zeros((L, len(xgrid)))
    for j in range(L):
        beta_dens[j, :] = beta.pdf(xgrid, j + 1, L - j)

    out = []
    for i in range(ndata):
        ws = dirichlet.rvs(np.ones(L) * 0.1)[0]
        pdf = np.sum(beta_dens * ws[:, np.newaxis], axis=0)
        curr = Distribution()
        curr.init_from_pdf(xgrid, pdf)
        out.append(curr)
    return out

Esempio n. 27

0

Mostra file

File: model_fit.py Progetto: ryanjgallagher/core_periphery_sbm

def sample_dirichlet_ps_prior_layered(n_layers, n_samples=1):
    """
    Samples from the Dirichlet ps prior for the layered model

    Returns
    -------
    ps: 2D array
        Array of arrays of ps drawn from the Dirichlet ps prior
    """
    spacings = dirichlet.rvs([1] * (n_layers + 1), size=n_samples)
    cuml_spacings = np.cumsum(spacings, axis=1)
    sampled_ps = (
        1 - cuml_spacings)[:, :n_layers]  # only take l out of l+1 spacings
    return sampled_ps

Esempio n. 28

0

Mostra file

    def random_sample(self):
        # limits of alpha: alb, aub
        # limts of beta: blb, bub
        # number of beta values: nb
        alphas = np.random.random_sample(
            self.nb) * (self.aub - self.alb) + self.alb
        print("alphas", alphas)

        betas = np.random.random_sample(
            self.nb) * (self.bub - self.blb) + self.blb
        alphas = np.ones(len(betas))
        # Random sample with Dirichlet distribution
        omegas = D.rvs([1] * self.nb)[0]
        return alphas, betas, omegas

Esempio n. 29

0

Mostra file

File: genchar.py Progetto: fraunhofer-izi/tumor_deconvolution

 def draw_samples(key):
     df = in_alphas[key]
     seed = hash(key) % 2**32
     np.random.seed(seed=seed)
     samples = df.T.sample(n=n, random_state=seed + 1, replace=True)
     names = list()
     draws = list()
     for name, samp in samples.iterrows():
         draws.append(dirichlet.rvs(samp, size=1).flatten())
         names.append(name)
     names = uniquify(names)
     new_df = pd.DataFrame(dict(zip(names, draws)), index=df.index)
     shared_alphas[key] = new_df
     return key

Esempio n. 30

0

Mostra file

def compute_prior_galaxies(mu_params, sigma_params, q_params, d, seed=None):
    ''' Generate draws from the multivariate normal prior 
    mean: (array-like) Mean of the multivariate prior
    cov: (ndarray) Covariance of the multivariate prior
    
    returns: (array-like) the draws from the prior
    '''
    rnd = np.random.RandomState(seed)
    mu = rnd.normal(loc=mu_params[0], scale=np.sqrt(mu_params[1]), size=d)
    sigma_square = invgamma.rvs(a=sigma_params[0] / 2,
                                scale=sigma_params[1] / 2,
                                size=d)
    q = dirichlet.rvs(alpha=q_params, random_state=seed)[0]

    return mu, sigma_square, q

Esempio n. 31

0

Mostra file

def dist_in_hull(a, b, n):
    points = np.vstack(pypoman.duality.compute_polytope_vertices(a, b))
    dims = points.shape[-1]
    hull = points[ConvexHull(points).vertices]
    deln = points[Delaunay(hull).simplices]

    vols = np.abs(
        det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)
    sample = np.random.choice(
        len(vols),
        size=n,
    )  # p = vols / vols.sum()

    return np.einsum('ijk, ij -> ik', deln[sample],
                     dirichlet.rvs([1] * (dims + 1), size=n))

Esempio n. 32

0

Mostra file

File: test_fastlda_geweke.py Progetto: BaxterEaves/ijcai-iml-2016

def _forward_sample(ldactr, n_docs, n_topics, n_words, n_words_per_doc, alpha,
                    beta):
    # docs, _ = gen_docs(n_docs, n_topics, n_words, n_words_per_doc, alpha, beta)
    # lda = ldactr(docs, n_topics, n_words, alpha, beta, seed)
    # z, asgn, n_dk, _ = _get_lda_properties(lda)
    docs = [{'w': [0]*n_words_per_doc} for _ in range(n_docs)]
    n_dk = np.zeros((n_docs, n_topics,))
    n_kw = np.zeros((n_topics, n_words,))
    asgn = []
    for d in range(n_docs):
        asgn_d = []
        theta_d = dirichlet.rvs([alpha]*n_topics)[0]
        for _ in range(n_words_per_doc):
            k = discrete_draw(theta_d)
            asgn_d.append(k)
            n_dk[d, k] += 1
        asgn.append(asgn_d)
    z = [item for sublist in asgn for item in sublist]
    docs, phi = _geweke_draw_docs(docs, asgn, n_dk, n_kw, alpha, beta,
                                  init=True)
    return docs, z, phi

Esempio n. 33

0

Mostra file

File: stats.py Progetto: zaxtax/pymc3

def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000,
            alpha=1, seed=None, round_to=2):
    R"""Compare models based on the widely available information criterion (WAIC)
    or leave-one-out (LOO) cross-validation.
    Read more theory here - in a paper by some of the leading authorities on
    model selection - dx.doi.org/10.1111/1467-9868.00353

    Parameters
    ----------
    model_dict : dictionary of PyMC3 traces indexed by corresponding model
    ic : string
        Information Criterion (WAIC or LOO) used to compare models.
        Default WAIC.
    method : str
        Method used to estimate the weights for each model. Available options
        are:
            - 'stacking' : (default) stacking of predictive distributions.
            - 'BB-pseudo-BMA' : pseudo-Bayesian Model averaging using Akaike-type
               weighting. The weights are stabilized using the Bayesian bootstrap
            - 'pseudo-BMA': pseudo-Bayesian Model averaging using Akaike-type
               weighting, without Bootstrap stabilization (not recommended)

        For more information read https://arxiv.org/abs/1704.02030
    b_samples: int
        Number of samples taken by the Bayesian bootstrap estimation. Only
        useful when method = 'BB-pseudo-BMA'.
    alpha : float
        The shape parameter in the Dirichlet distribution used for the
        Bayesian bootstrap. Only useful when method = 'BB-pseudo-BMA'. When
        alpha=1 (default), the distribution is uniform on the simplex. A
        smaller alpha will keeps the final weights more away from 0 and 1.
    seed : int or np.random.RandomState instance
           If int or RandomState, use it for seeding Bayesian bootstrap. Only
           useful when method = 'BB-pseudo-BMA'. Default None the global
           np.random state is used.
    round_to : int
        Number of decimals used to round results (default 2).

    Returns
    -------
    A DataFrame, ordered from lowest to highest IC. The index reflects
    the order in which the models are passed to this function. The columns are:
    IC : Information Criteria (WAIC or LOO).
        Smaller IC indicates higher out-of-sample predictive fit ("better" model).
        Default WAIC.
    pIC : Estimated effective number of parameters.
    dIC : Relative difference between each IC (WAIC or LOO)
    and the lowest IC (WAIC or LOO).
        It's always 0 for the top-ranked model.
    weight: Relative weight for each model.
        This can be loosely interpreted as the probability of each model
        (among the compared model) given the data. By default the uncertainty
        in the weights estimation is considered using Bayesian bootstrap.
    SE : Standard error of the IC estimate.
        If method = BB-pseudo-BMA these values are estimated using Bayesian
        bootstrap.
    dSE : Standard error of the difference in IC between each model and
    the top-ranked model.
        It's always 0 for the top-ranked model.
    warning : A value of 1 indicates that the computation of the IC may not be
        reliable. Details see the related warning message in pm.waic and pm.loo
    """

    names = [model.name for model in model_dict if model.name]
    if not names:
        names = np.arange(len(model_dict))

    if ic == 'WAIC':
        ic_func = waic
        df_comp = pd.DataFrame(index=names,
                               columns=['WAIC', 'pWAIC', 'dWAIC', 'weight',
                                        'SE', 'dSE', 'var_warn'])

    elif ic == 'LOO':
        ic_func = loo
        df_comp = pd.DataFrame(index=names,
                               columns=['LOO', 'pLOO', 'dLOO', 'weight',
                                        'SE', 'dSE', 'shape_warn'])

    else:
        raise NotImplementedError(
            'The information criterion {} is not supported.'.format(ic))

    if len(set([len(m.observed_RVs) for m in model_dict])) != 1:
        raise ValueError(
            'The number of observed RVs should be the same across all models')

    if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']:
        raise ValueError('The method {}, to compute weights,'
                         'is not supported.'.format(method))

    ics = []
    for n, (m, t) in zip(names, model_dict.items()):
        ics.append((n, ic_func(t, m, pointwise=True)))

    ics.sort(key=lambda x: x[1][0])

    if method == 'stacking':
        N, K, ic_i = _ic_matrix(ics)
        exp_ic_i = np.exp(-0.5 * ic_i)
        Km = K - 1

        def w_fuller(w):
            return np.concatenate((w, [max(1. - np.sum(w), 0.)]))

        def log_score(w):
            w_full = w_fuller(w)
            score = 0.
            for i in range(N):
                score += np.log(np.dot(exp_ic_i[i], w_full))
            return -score

        def gradient(w):
            w_full = w_fuller(w)
            grad = np.zeros(Km)
            for k in range(Km):
                for i in range(N):
                    grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \
                        np.dot(exp_ic_i[i], w_full)
            return -grad

        theta = np.full(Km, 1. / K)
        bounds = [(0., 1.) for i in range(Km)]
        constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.},
                       {'type': 'ineq', 'fun': lambda x: np.sum(x)}]

        w = minimize(fun=log_score,
                     x0=theta,
                     jac=gradient,
                     bounds=bounds,
                     constraints=constraints)

        weights = w_fuller(w['x'])
        ses = [res[1] for _, res in ics]

    elif method == 'BB-pseudo-BMA':
        N, K, ic_i = _ic_matrix(ics)
        ic_i = ic_i * N

        b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples,
                                    random_state=seed)
        weights = np.zeros((b_samples, K))
        z_bs = np.zeros_like(weights)
        for i in range(b_samples):
            z_b = np.dot(b_weighting[i], ic_i)
            u_weights = np.exp(-0.5 * (z_b - np.min(z_b)))
            z_bs[i] = z_b
            weights[i] = u_weights / np.sum(u_weights)

        weights = weights.mean(0)
        ses = z_bs.std(0)

    elif method == 'pseudo-BMA':
        min_ic = ics[0][1][0]
        Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics])
        weights = []
        ses = []
        for _, res in ics:
            weights.append(np.exp(-0.5 * (res[0] - min_ic)) / Z)
            ses.append(res[1])

    if np.any(weights):
        for i, (idx, res) in enumerate(ics):
            diff = res[4] - ics[0][1][4]
            d_ic = np.sum(diff)
            d_se = np.sqrt(len(diff) * np.var(diff))
            se = ses[i]
            weight = weights[i]
            df_comp.at[idx] = (round(res[0], round_to),
                               round(res[2], round_to),
                               round(d_ic, round_to),
                               round(weight, round_to),
                               round(se, round_to),
                               round(d_se, round_to),
                               res[3])

        return df_comp.sort_values(by=ic)

Esempio n. 34

0

Mostra file

File: test_utils.py Progetto: BaxterEaves/baxcat_cxx

def _categorical_params(n_cats, sep):
    sep = min(.95, sep)
    # use 5 as an arbitrary cardinality
    # add .01 to probability to make sure there are no zeros
    ps = (dirichlet.rvs([1.-sep]*5, n_cats)+.01)/1.05
    return [{'alpha': p} for p in ps]

Esempio n. 35

0

Mostra file

File: utils.py Progetto: BaxterEaves/ijcai-iml-2016

def gen_docs(n_docs=None, n_topics=None, n_words=None, n_words_per_doc=10,
             alpha=.1, beta=.1, noise=.0, phis=None, thetas=None):
    """ Generate documents from LDA generative process.

    Parameters
    ----------
    n_docs : int, optional if both phis and thetas are not None
        Number of documents to generate
    n_topics : int, optional if both phis and thetas are not None
        Number of topics
    n_words : int, optional if both phis and thetas are not None
        Number of words in the vocabulary
    n_words_per_doc : int, or array-like(int)
        Number of words to generate for each document
    alpha : float (0, Inf)
        Symmetric dirichlet parameter for the distribution of topics in
        documents.
    beta : float (0, Inf)
        Symmteric dirichlet parameter for the distribution of words in topics.
    noise : float [0, 1], optional
        Proportion of generated words to scramble.

    Returns
    -------
    docs : list
        List of data structures (dict) represtenting the generated documents.
        Each doc has a 'counts' key where each key is an integer word index and
        each value is the number of times that word occurs in the document.
    phis : list
        List of topics (weights of words)
    """
    if phis is None:
        phis = []
        for j in range(n_topics):
            phis.append(dirichlet.rvs([beta]*n_words)[0])
    else:
        n_words = len(phis[0])
        if not all(len(phi) == n_words for phi in phis):
            raise ValueError("All arrays in phis must be the same length")

        n_topics = len(thetas[0])
        if not all(len(theta) == n_topics for theta in thetas):
            raise ValueError("All arrays in thetas must be the same length")

        if len(phis) != n_topics:
            # FIXME: This is stupid error message
            raise ValueError("phis and thetas are inconsistent")

        n_docs = len(thetas)

    if isinstance(n_words_per_doc, int):
        n_words_per_doc = [n_words_per_doc]*n_docs

    if isinstance(n_words_per_doc, (list, np.ndarray)):
        if len(n_words_per_doc) != n_docs:
            raise ValueError("n_docs and n_words_per_doc are inconsistent")

    docs = []
    for k in range(n_docs):
        if thetas is None:
            theta_k = dirichlet.rvs([alpha]*n_topics)[0]
        else:
            theta_k = thetas[k]
        doc = {
            'words': set([i for i in range(n_words)]),
            'counts': dict((i, 0) for i in range(n_words)),
            'w': [],
            'theta': theta_k
        }
        for _ in range(n_words_per_doc[k]):
            j = int(discrete_draw(theta_k))
            w = int(discrete_draw(phis[j]))
            if noise > 0:
                if np.random.rand() < noise:
                    w = random.randrange(n_words)
            doc['counts'][w] += 1
            doc['w'].append(w)
        docs.append(doc)

    return docs, phis