Esempio n. 1
0
	def update_beliefs(self, observation_dict=None):
		"""
		Este método crea el esqueleto de las CPTs usando una distribución Dirichlet.
		Usa el diccionario alpha para crear las CPTs y guardarlas en el diccionario 
		de beliefs.
		"""
		# primero actualizo mis alphas
		if observation_dict:
			self.update_alpha_parameters(observation_dict)
		# recorrer el diccionario alpha
		for variable in self.alpha_params:
			table = []
			if not self.alpha_params[variable]["has_parents"]:
				alpha = self.alpha_params[variable][""]
				table = dirichlet.rvs(alpha, size=1).tolist()
			else:
				for parents_instance in self.alpha_params[variable]:
					if parents_instance == "has_parents":
						continue
					alpha = self.alpha_params[variable][parents_instance]
					probabilities = np.squeeze(dirichlet.rvs(alpha, size=1))
					table.append(probabilities)
				table = np.array(table).transpose().tolist()
			self.beliefs[variable] = table
		logging.info("Beliefs after update")
		logging.info(json.dumps(self.beliefs, indent=2))
Esempio n. 2
0
    def _sampler(self, doc_term):
        docs_number = doc_term.shape[0]
        vobs_number = doc_term.shape[1]
        Phi = dirichlet.rvs([self.Beta]*vobs_number, size=self.K, random_state=self.seed)
        Theta = dirichlet.rvs([self.Alpha]*self.K, random_state=self.seed)
        Z = [[self.Alpha]*self.K] * doc_term.shape[0] * Theta
        print (Phi)
        print (Theta)

        itr = 0
        while itr <= self.max_iter:
            # update Phi and Beta
            e = self.Beta + np.multiply(doc_term, Z.T.reshape(self.K, doc_term.shape[0],1)).sum(axis=1) - 1 + 0.1
            for k in range(len(e)):
                Phi[k] = dirichlet.rvs(e[k], random_state=None)

            # update Z
            temp = np.power(np.repeat(Phi.reshape(Phi.shape[0], -1, Phi.shape[1]), doc_term.shape[0], axis=1), doc_term)
            temp = np.prod(temp, axis=2)
            Z = np.multiply(temp, Z.T).T
            Z = Z/Z.sum(axis=1, keepdims=True)

            # update Theta
            d = self.Alpha + Z.sum(axis=0, keepdims=True)
            Theta = dirichlet.rvs(d[0], random_state=None)

            itr += 1

        return Phi, Theta, Z
Esempio n. 3
0
    def __init__(self, k, MD, alpha, beta, random_state=None):
        self.k = k
        if isinstance(alpha, numbers.Real):
            self.alpha = alpha * np.ones(self.k)
        else:
            self.alpha = alpha
        assert len(self.alpha) == self.k

        self.MD = MD
        if isinstance(beta, numbers.Real):
            self.betas = [[beta * np.ones(md) for md in self.MD]
                          for _ in range(self.k)]
        else:
            self.betas = beta
        for kk in range(self.k):
            for d, md in enumerate(MD):
                assert len(self.betas[kk][d]) == md

        self.random_state = random_state
        self.weights = dirichlet.rvs(self.alpha,
                                     size=1,
                                     random_state=self.random_state)[0]
        self.thetas = [[
            dirichlet.rvs(beta_1_dim, size=1,
                          random_state=self.random_state)[0]
            for beta_1_dim in self.betas[kk]
        ] for kk in range(self.k)]
Esempio n. 4
0
def generate_emb(
        n,
        dim,
        model_spec,
):
    common_init = dirichlet.rvs(
        np.ones(dim, dtype=np.float32) * model_spec["common_init_alpha"]
    ).squeeze()
    common = model_spec["rescale_common"] * common_init
    embs = dirichlet.rvs(
        common,
        size=n
    )
    return embs
def _forward_sample(ldactr, n_docs, n_topics, n_words, n_words_per_doc, alpha,
                    beta):
    # docs, _ = gen_docs(n_docs, n_topics, n_words, n_words_per_doc, alpha, beta)
    # lda = ldactr(docs, n_topics, n_words, alpha, beta, seed)
    # z, asgn, n_dk, _ = _get_lda_properties(lda)
    docs = [{'w': [0] * n_words_per_doc} for _ in range(n_docs)]
    n_dk = np.zeros((
        n_docs,
        n_topics,
    ))
    n_kw = np.zeros((
        n_topics,
        n_words,
    ))
    asgn = []
    for d in range(n_docs):
        asgn_d = []
        theta_d = dirichlet.rvs([alpha] * n_topics)[0]
        for _ in range(n_words_per_doc):
            k = discrete_draw(theta_d)
            asgn_d.append(k)
            n_dk[d, k] += 1
        asgn.append(asgn_d)
    z = [item for sublist in asgn for item in sublist]
    docs, phi = _geweke_draw_docs(docs,
                                  asgn,
                                  n_dk,
                                  n_kw,
                                  alpha,
                                  beta,
                                  init=True)
    return docs, z, phi
Esempio n. 6
0
def data_fn(n_tasks=10,
            n_clust=4,
            n_entities=(100, 500),
            n_dim=2,
            device=None,
            return_params=False):
    n_ent = np.random.randint(*n_entities)

    x = np.random.randn(n_tasks, n_ent, n_dim) * 0.3
    mu = np.random.rand(n_tasks, n_dim, n_clust) * 8 - 4
    pi = dirichlet.rvs(np.ones(n_clust), n_tasks)
    idx = np.zeros((n_tasks, n_ent), dtype=np.long)

    for i in range(n_tasks):
        idx[i] = np.random.choice(n_clust, n_ent, p=pi[i])
        x[i] += mu[i, :, idx[i]]

    x = torch.tensor(x).float().to(device)
    idx = torch.tensor(idx).to(device)

    if not return_params:
        return x, idx, None

    si = 0.3 * torch.ones(n_tasks, n_dim, n_clust).float().to(device)
    mu = torch.tensor(mu).float().to(device)
    pi = torch.log(torch.tensor(pi).float()).to(device)
    return x, idx, None, mu, si, pi
def SmaplingDIr(obs, index):
    """
    Estimating 95% Heighest posterior deinsty (HDP) in Dirichlet distribution.
    Although observed data follows a multinomial-distribution,
    we can also use the cases when the data are binary (in such cases,DIrichlet is Beta dist)

    Parameters
    ----------
    obs : 1D array
        number of obseraving each event.
    index : int
        event to analyze its occuring probability.

    Returns
    -------
    95% HPD of occuring event index
    """
    a = np.ones(np.size(obs))  # prior is uniform
    a += obs  # posterior after observing data
    #posterior = dirichlet(a)
    sample = dirichlet.rvs(
        a, size=10000)  #sampleing parameter values from posterior distirbution
    sample_index = sample[:, index]  # focus on parameter[index] alone
    """
    plt.hist(sample_index)
    plt.xlim(0,1)
    plt.show()
    """
    # calculate 95% HDI
    return pm.stats.hpd(sample_index)
Esempio n. 8
0
def generate_constraints_dirichlet(ntasks, total):
    """Generate some random constraints that sum to a given total.
    These constraints are uniformly distributed via the Dirichlet
    Distribution"""
    ret = dirichlet.rvs([1]*ntasks)[0] * total
    assert abs(sum(ret) - total) < 1e-5  # Allows for some floating point error
    return list(ret)
Esempio n. 9
0
def random_initialization(data, num_components, seed=None):
    """
    Initializes parameters randomly.

    :param data: observed data
    :param num_components: number of components
    :param seed: random seed
    :return:
    """

    dim = data.shape[1]
    alpha = np.ones(num_components)
    mixture_weights = dirichlet.rvs(alpha, size=1, random_state=seed)[0]
    min_values = np.min(data, axis=0)
    max_values = np.max(data, axis=0)
    # Means are generated randomly within the data range
    means = list((max_values - min_values) *
                 np.random.rand(num_components, dim) + min_values)
    covariances = [
        0.25 * np.diag(
            np.abs((max_values - min_values) * np.random.rand(2) + min_values))
        for _ in range(num_components)
    ]

    return mixture_weights, means, covariances
Esempio n. 10
0
 def draw(self, K = 10, N = 1*10**5, m = 3, gaussian = False):
     
     if self.seed is not None:
         np.random.seed(self.seed)
  
     alphas = gamma.rvs(5, size=m)               # shape parameter
     #print(sum(alphas))                              # equivalent sample size
     self.p = dirichlet.rvs(alpha = alphas, size = 1)[0]
     self.phi_is = multinomial.rvs(1, self.p, size=N)       # draw from categorical p.m.f
     
     self.x_draws = np.zeros((N,K))
     self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(), dict(), dict(), tuple(), tuple(), tuple()
     
     for i in range(m):
     
           self.hyper_loc["mean"+str(i+1)] = norm.rvs(size = 1, loc = 0, scale = 5)
           self.hyper_scale["scale"+str(i+1)] = 1/gamma.rvs(5, size=1)
           
           self.thetas["mean"+str(i+1)] = norm.rvs(size = K, loc = self.hyper_loc["mean"+str(i+1)], 
                       scale = self.hyper_scale["scale"+str(i+1)])
           self.thetas["Sigma"+str(i+1)] = np.eye(K)*(1/gamma.rvs(5, size=K))
           self.thetas["nu"+str(i+1)] = randint.rvs(K+2, K+10, size=1)[0]
     
           if gaussian:
              self.covs += (self.thetas['Sigma'+str(i+1)], )
           else:
              self.covs += (wishart.rvs(df = self.thetas['nu'+str(i+1)], scale = self.thetas['Sigma'+str(i+1)], size=1),)
              self.var += (self.thetas["nu"+str(i+1)]/(self.thetas["nu"+str(i+1)]-2)*self.covs[i],)       # variance covariance matrix of first Student-t component
           self.rdraws += (np.random.multivariate_normal(self.thetas["mean"+str(i+1)], self.covs[i], N),)
     
           self.Phi = np.tile(self.phi_is[:,i], K).reshape(K,N).T              # repeat phi vector to match with random matrix
           self.x_draws += np.multiply(self.Phi, self.rdraws[i])                
     return self.x_draws
def generate_categorical_dist(z_alpha,x_alpha,t_alpha,y_alpha):
    """z_alpha of shape (z_categories,), x_alpha of shape (z_categories, x_dim, x_categories),
    t_alpha of shape (z_categories, t_categories), y_alpha of shape (z_categories, t,categories, y_categories)"""
    z_c, x_dim, x_c = x_alpha.shape
    _, t_c, y_c = y_alpha.shape
    z_probs = dirichlet.rvs(z_alpha, size=1)[0]
    x_probs = np.zeros(x_alpha.shape)
    t_probs = np.zeros(t_alpha.shape)
    y_probs = np.zeros(y_alpha.shape)
    for z_cat in range(z_c):
        for x_d in range(x_dim):
            x_probs[z_cat,x_d,:] = dirichlet.rvs(x_alpha[z_cat,x_d,:],size=1)[0]
        t_probs[z_cat,:] = dirichlet.rvs(t_alpha[z_cat,:],size=1)[0]
        for t_cat in range(t_c):
            y_probs[z_cat,t_cat,:] = dirichlet.rvs(y_alpha[z_cat,t_cat,:],size=1)[0]
    return z_probs,x_probs,t_probs,y_probs
Esempio n. 12
0
 def random_sample(self):
     alphas = np.random.random_sample(
         self.nb) * (self.aub - self.alb) + self.alb
     betas = np.random.random_sample(
         self.nb) * (self.bub - self.blb) + self.blb
     omegas = D.rvs([1] * self.nb)[0]
     return alphas, betas, omegas
Esempio n. 13
0
    def sample(self, point, n_samples=1):
        """Sample from the Dirichlet distribution.

        Sample from the Dirichlet distribution with parameters provided
        by point. This gives n_samples points in the simplex.

        Parameters
        ----------
        point : array-like, shape=[..., dim]
            Point representing a Dirichlet distribution.
        n_samples : int
            Number of points to sample for each set of parameters in point.
            Optional, default: 1.

        Returns
        -------
        samples : array-like, shape=[..., n_samples]
            Sample from the Dirichlet distributions.
        """
        geomstats.errors.check_belongs(point, self)
        point = gs.to_ndarray(point, to_ndim=2)
        samples = []
        for param in point:
            samples.append(gs.array(dirichlet.rvs(param, size=n_samples)))
        return samples[0] if len(point) == 1 else gs.stack(samples)
Esempio n. 14
0
def generate_categorical_prob(num_of_categories, alpha=None):
    if alpha is None:
        alpha = np.random.mtrand.dirichlet([10] * num_of_categories)
    else:
        alpha = alpha * np.ones(num_of_categories)
    var = dirichlet.rvs(alpha=alpha, size=1, random_state=None)
    return var[0]
Esempio n. 15
0
def mcts(game,
         evaluator,
         num_simulations=32,
         add_dir_noise=True,
         dir_noise_weight=DIRICHLET_WEIGHT,
         dir_alpha=DIRICHLET_ALPHA,
         verbose=False):
    def _gen_pi(node):
        pi_t = np.zeros_like(policy)
        for child_id in node.children:
            pi_t[child_id] = 1. * node.children[child_id].num_visit
        return 1. / (node.num_visit - 1) * pi_t

    v, p = evaluator.predict(
        np.reshape(game.game_state * game.current_player.color,
                   newshape=(1, game.board.board_size, game.board.board_size,
                             1)))
    policy = p[0]
    if add_dir_noise:
        policy = (1-dir_noise_weight)*policy + \
                 dir_noise_weight*dirichlet.rvs([dir_alpha]*(p.shape[1]))[0]
    root = Node(value=v[0, 0], policy=policy)
    for i in range(num_simulations):
        # debug('start %d-th simulation' % (i+1))
        game_status = game.get_game_status()
        simulation(root, game, evaluator)
        game.reset(game_status)
    return _gen_pi(root)
Esempio n. 16
0
    def set_random_probabilities(self):
        # set get 成对出现
        # 输入 输出
        prior = np.random.randint(1, 10, self.valuenumber)
        #fix prior =(120,22,23,……,)

        self.probabilities = dirichlet.rvs(prior)[0]
def Fig5AC():
    os.chdir('../appendix/Appendix3_constant_env/scenario1')
    CompExcl = np.loadtxt('CompetitiveExclusion_model1.csv',
                          delimiter=',',
                          skiprows=1)
    BothExtinct = np.loadtxt('BothExtinction_model1.csv',
                             delimiter=',',
                             skiprows=1)
    death = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
    col_list = ['#8dd3c7', '#fb8072', '#80b1d3']
    lab_list = ['scarce', 'mean', 'abundant']
    width = 0.05
    for i in range(len(lab_list)):

        plt.bar(death,
                CompExcl[:, i],
                color=col_list[i],
                width=width,
                align='center',
                hatch='..',
                label='exclusion of fittest')
        plt.bar(death,
                BothExtinct[:, i],
                color=col_list[i],
                label='both extinction',
                width=width,
                align='center',
                bottom=CompExcl[:, i])

        plt.xticks(fontsize=16)
        plt.ylabel('probability', fontsize=20)
        plt.yticks(fontsize=16)
        plt.ylim(0, 1)
        plt.xlabel('toxin sensitivity', fontsize=20)
        plt.legend(loc='lower center',
                   bbox_to_anchor=(.5, 1.05),
                   ncol=2,
                   fontsize=16)

        plt.plot(death,
                 CompExcl[:, i],
                 color='k',
                 label=lab_list[i],
                 linewidth=3)
        plt.text(x=0.05, y=0.9, s=lab_list[i], fontsize=20)

        for j in range(np.size(death)):
            comp = CompExcl[j, i]
            a = np.array([comp, 1 - comp
                          ]) * 10**5  # total number of simulations is 10**5
            sample = dirichlet.rvs(a + 1, size=10000)
            hdp_l, hdp_h = pm.stats.hpd(sample[:, 0])
            plt.vlines(death[j], hdp_l, hdp_h, color=col_list[i])
        plt.savefig('ConstantEnv_ver2' + lab_list[i] + '.pdf',
                    bbox_inches='tight',
                    pad_inches=0.05)
        plt.show()
Esempio n. 18
0
    def draw(self, K=10, N=1 * 10**5, m=3, gaussian=False):
        """
        Inputs:
        -------
        N: sample size
        K: Dimension of Normal/Student distr.
        m: number of mixture components
        """
        np.random.seed(self.seed)
        self.st0 = np.random.get_state()  # get initial state of RNG
        #np.random.set_state(self.st0)
        print("Drawing from", m, "component mixture distribution.")
        alphas = gamma.rvs(5, size=m)  # shape parameter
        #print(sum(alphas))                              # equivalent sample size
        self.p = dirichlet.rvs(alpha=alphas, size=1)[0]
        self.phi_is = multinomial.rvs(1, self.p,
                                      size=N)  # draw from categorical p.m.f

        self.x_draws = np.zeros((N, K))
        self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(
        ), dict(), dict(), tuple(), tuple(), tuple()

        for i in range(m):

            self.hyper_loc["mean" + str(i + 1)] = norm.rvs(size=1,
                                                           loc=0,
                                                           scale=5)
            self.hyper_scale["scale" + str(i + 1)] = 1 / gamma.rvs(5, size=1)

            self.thetas["mean" + str(i + 1)] = norm.rvs(
                size=K,
                loc=self.hyper_loc["mean" + str(i + 1)],
                scale=self.hyper_scale["scale" + str(i + 1)])
            self.thetas["Sigma" +
                        str(i + 1)] = np.eye(K) * (1 / gamma.rvs(5, size=K))
            self.thetas["nu" + str(i + 1)] = randint.rvs(K + 2, K + 10,
                                                         size=1)[0]

            if gaussian:
                self.covs += (self.thetas['Sigma' + str(i + 1)], )
            else:
                self.covs += (wishart.rvs(df=self.thetas['nu' + str(i + 1)],
                                          scale=self.thetas['Sigma' +
                                                            str(i + 1)],
                                          size=1), )
                self.var += (
                    self.thetas["nu" + str(i + 1)] /
                    (self.thetas["nu" + str(i + 1)] - 2) * self.covs[i],
                )  # variance covariance matrix of first Student-t component
            self.rdraws += (np.random.multivariate_normal(
                self.thetas["mean" + str(i + 1)], self.covs[i], N), )

            self.Phi = np.tile(self.phi_is[:, i], K).reshape(
                K, N).T  # repeat phi vector to match with random matrix
            self.x_draws += np.multiply(self.Phi, self.rdraws[i])

        return self.x_draws, np.argmax(self.phi_is, 1)  # X, latent
Esempio n. 19
0
    def sample(cls, params=None, nat_params=None, size=1):
        # Sample from P( x | Ѳ; α )
        assert (params is None) ^ (nat_params is None)

        (alpha, ) = params if params is not None else cls.natToStandard(
            *nat_params)
        ans = dirichlet.rvs(alpha=alpha, size=size)
        cls.checkShape(ans)
        return ans
Esempio n. 20
0
 def __init__(self, K):
     fig = plt.figure(figsize=(12, 8), facecolor='white')
     ax_1 = fig.add_subplot(111, frameon=False)
     plt.show(block=False)
     self.ax_1 = ax_1
     self.fig = fig
    
     self.xlim = [-6, 6.]
     self.ylim = [-6, 6.]
     self.num_its = 10
     
     if K==3:
         colours = np.eye(3) # if there are there colours may as well use rgb
     else:
         colours = dirichlet.rvs(0.1*np.ones(3), K) \
                   + dirichlet.rvs(0.1*np.ones(3), K) # else try to take some contrasting ones
         colours = colours/sum(colours)
     self.colours = colours
     self.z_to_colour = lambda z: colours.T.dot(np.reshape(z, (K, 1)))
Esempio n. 21
0
 def test(self, alpha, p, size=1):
     q = alpha * p
     d = dirichlet.rvs(q, size, self.random_state)
     print(d.shape)
     mean = np.zeros(q.shape)
     for i in d:
         mean = mean + i
         # print(i)
     mean = mean / size
     print(mean)
Esempio n. 22
0
def __sample_dirichlet__(n, max_alpha=10):
    """
    Samples from the Dirichlet distribution to a produce
    a probability vector of length n. The sum of each probability
    in the probability vector should sum to 1.
    :param n: Number of alpha's to sample.
    :param max_alpha: The maximum alpha.
    :return: Array of Dirichlet distributed values.
    """
    return np.array(dirichlet.rvs(__generate_alphas__(n, max_alpha))[0])
Esempio n. 23
0
def dist_in_hull(points, n):
    dims = points.shape[-1]
    hull = points[ConvexHull(points).vertices]
    deln = points[Delaunay(hull).simplices]

    vols = np.abs(
        det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)
    sample = np.random.choice(len(vols), size=n, p=vols / vols.sum())

    return np.einsum('ijk, ij -> ik', deln[sample],
                     dirichlet.rvs([1] * (dims + 1), size=n))
    def draw(self, N_samples=1):
        """

        Returns
        -------
        Posterior draw from the transition model
        """
        # generate transition model
        if N_samples == 1:
            T = np.zeros_like(self.data)
            alpha_post = self.alpha + self.data
            for curState in range(self.nStates):
                for action in range(self.nActions):
                    T[:, curState, action] = dirichlet.rvs(alpha_post[:, curState, action])
        else:
            T = [np.zeros_like(self.data) for _ in range(N_samples)]
            alpha_post = self.alpha + self.data
            for curState in range(self.nStates):
                for action in range(self.nActions):
                    samples = np.split(dirichlet.rvs(alpha_post[:, curState, action], size=N_samples), N_samples)
                    for s, sample in enumerate(samples):
                        T[s][:, curState, action] = sample
        return T
Esempio n. 25
0
    def __init__(self, docs, n_topics, n_words, alpha=1.0, beta=1.0,
                 init_mode='prior', seed=None):
        """
        Parameters
        ----------
        docs : list<dict>
            list of document data structures
        n_topics : int
            number of topics
        n_words : int
            number of words in corpus (vocabulary)
        alpha : float (0, Inf), optional
            symmetric Dirchlet parameter for word/document distribution
        beta : float (0, Inf), optional
            symmetric Drichlet parameter for topic/document distribution
        """
        self._docs = docs
        self._n_docs = len(docs)
        self._n_topics = n_topics
        self._n_words = n_words
        self._alpha = alpha
        self._beta = beta

        # number of words assigned to topic k in doc d
        self._n_dk = np.zeros((self._n_docs, self._n_topics,))
        # number of times word w is assigned to topic k
        self._n_kw = np.zeros((self._n_topics, self._n_words,))
        # number of times any word is assigned to topic k
        self._n_k = np.zeros((1, self._n_topics,))
        # Entry z[d][w] is the topic to which the w^th word in document d is
        # assigned
        self._z = []
        self._key = []

        for d, doc in enumerate(self._docs):
            self._z.append([])
            if init_mode == 'prior':
                theta_k = dirichlet.rvs([self._alpha]*self._n_topics)[0]
            elif init_mode == 'random':
                theta_k = np.ones(self._n_topics)/self._n_topics
            else:
                raise ValueError("init_mode must be 'random' or 'prior'")
            for w, wrd in enumerate(doc['w']):
                topic = int(discrete_draw(theta_k))
                self._z[d].append(topic)

                self._n_dk[d, topic] += 1.0
                self._n_kw[topic, wrd] += 1.0
                self._n_k[0, topic] += 1.0
                self._key.append((d, wrd, w,))
def simulate_data(ndata):
    L = 500
    beta_dens = np.zeros((L, len(xgrid)))
    for j in range(L):
        beta_dens[j, :] = beta.pdf(xgrid, j + 1, L - j)

    out = []
    for i in range(ndata):
        ws = dirichlet.rvs(np.ones(L) * 0.1)[0]
        pdf = np.sum(beta_dens * ws[:, np.newaxis], axis=0)
        curr = Distribution()
        curr.init_from_pdf(xgrid, pdf)
        out.append(curr)
    return out
def sample_dirichlet_ps_prior_layered(n_layers, n_samples=1):
    """
    Samples from the Dirichlet ps prior for the layered model

    Returns
    -------
    ps: 2D array
        Array of arrays of ps drawn from the Dirichlet ps prior
    """
    spacings = dirichlet.rvs([1] * (n_layers + 1), size=n_samples)
    cuml_spacings = np.cumsum(spacings, axis=1)
    sampled_ps = (
        1 - cuml_spacings)[:, :n_layers]  # only take l out of l+1 spacings
    return sampled_ps
Esempio n. 28
0
    def random_sample(self):
        # limits of alpha: alb, aub
        # limts of beta: blb, bub
        # number of beta values: nb
        alphas = np.random.random_sample(
            self.nb) * (self.aub - self.alb) + self.alb
        print("alphas", alphas)

        betas = np.random.random_sample(
            self.nb) * (self.bub - self.blb) + self.blb
        alphas = np.ones(len(betas))
        # Random sample with Dirichlet distribution
        omegas = D.rvs([1] * self.nb)[0]
        return alphas, betas, omegas
 def draw_samples(key):
     df = in_alphas[key]
     seed = hash(key) % 2**32
     np.random.seed(seed=seed)
     samples = df.T.sample(n=n, random_state=seed + 1, replace=True)
     names = list()
     draws = list()
     for name, samp in samples.iterrows():
         draws.append(dirichlet.rvs(samp, size=1).flatten())
         names.append(name)
     names = uniquify(names)
     new_df = pd.DataFrame(dict(zip(names, draws)), index=df.index)
     shared_alphas[key] = new_df
     return key
Esempio n. 30
0
def compute_prior_galaxies(mu_params, sigma_params, q_params, d, seed=None):
    ''' Generate draws from the multivariate normal prior 
    mean: (array-like) Mean of the multivariate prior
    cov: (ndarray) Covariance of the multivariate prior
    
    returns: (array-like) the draws from the prior
    '''
    rnd = np.random.RandomState(seed)
    mu = rnd.normal(loc=mu_params[0], scale=np.sqrt(mu_params[1]), size=d)
    sigma_square = invgamma.rvs(a=sigma_params[0] / 2,
                                scale=sigma_params[1] / 2,
                                size=d)
    q = dirichlet.rvs(alpha=q_params, random_state=seed)[0]

    return mu, sigma_square, q
Esempio n. 31
0
def dist_in_hull(a, b, n):
    points = np.vstack(pypoman.duality.compute_polytope_vertices(a, b))
    dims = points.shape[-1]
    hull = points[ConvexHull(points).vertices]
    deln = points[Delaunay(hull).simplices]

    vols = np.abs(
        det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)
    sample = np.random.choice(
        len(vols),
        size=n,
    )  # p = vols / vols.sum()

    return np.einsum('ijk, ij -> ik', deln[sample],
                     dirichlet.rvs([1] * (dims + 1), size=n))
def _forward_sample(ldactr, n_docs, n_topics, n_words, n_words_per_doc, alpha,
                    beta):
    # docs, _ = gen_docs(n_docs, n_topics, n_words, n_words_per_doc, alpha, beta)
    # lda = ldactr(docs, n_topics, n_words, alpha, beta, seed)
    # z, asgn, n_dk, _ = _get_lda_properties(lda)
    docs = [{'w': [0]*n_words_per_doc} for _ in range(n_docs)]
    n_dk = np.zeros((n_docs, n_topics,))
    n_kw = np.zeros((n_topics, n_words,))
    asgn = []
    for d in range(n_docs):
        asgn_d = []
        theta_d = dirichlet.rvs([alpha]*n_topics)[0]
        for _ in range(n_words_per_doc):
            k = discrete_draw(theta_d)
            asgn_d.append(k)
            n_dk[d, k] += 1
        asgn.append(asgn_d)
    z = [item for sublist in asgn for item in sublist]
    docs, phi = _geweke_draw_docs(docs, asgn, n_dk, n_kw, alpha, beta,
                                  init=True)
    return docs, z, phi
Esempio n. 33
0
def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000,
            alpha=1, seed=None, round_to=2):
    R"""Compare models based on the widely available information criterion (WAIC)
    or leave-one-out (LOO) cross-validation.
    Read more theory here - in a paper by some of the leading authorities on
    model selection - dx.doi.org/10.1111/1467-9868.00353

    Parameters
    ----------
    model_dict : dictionary of PyMC3 traces indexed by corresponding model
    ic : string
        Information Criterion (WAIC or LOO) used to compare models.
        Default WAIC.
    method : str
        Method used to estimate the weights for each model. Available options
        are:
            - 'stacking' : (default) stacking of predictive distributions.
            - 'BB-pseudo-BMA' : pseudo-Bayesian Model averaging using Akaike-type
               weighting. The weights are stabilized using the Bayesian bootstrap
            - 'pseudo-BMA': pseudo-Bayesian Model averaging using Akaike-type
               weighting, without Bootstrap stabilization (not recommended)

        For more information read https://arxiv.org/abs/1704.02030
    b_samples: int
        Number of samples taken by the Bayesian bootstrap estimation. Only
        useful when method = 'BB-pseudo-BMA'.
    alpha : float
        The shape parameter in the Dirichlet distribution used for the
        Bayesian bootstrap. Only useful when method = 'BB-pseudo-BMA'. When
        alpha=1 (default), the distribution is uniform on the simplex. A
        smaller alpha will keeps the final weights more away from 0 and 1.
    seed : int or np.random.RandomState instance
           If int or RandomState, use it for seeding Bayesian bootstrap. Only
           useful when method = 'BB-pseudo-BMA'. Default None the global
           np.random state is used.
    round_to : int
        Number of decimals used to round results (default 2).

    Returns
    -------
    A DataFrame, ordered from lowest to highest IC. The index reflects
    the order in which the models are passed to this function. The columns are:
    IC : Information Criteria (WAIC or LOO).
        Smaller IC indicates higher out-of-sample predictive fit ("better" model).
        Default WAIC.
    pIC : Estimated effective number of parameters.
    dIC : Relative difference between each IC (WAIC or LOO)
    and the lowest IC (WAIC or LOO).
        It's always 0 for the top-ranked model.
    weight: Relative weight for each model.
        This can be loosely interpreted as the probability of each model
        (among the compared model) given the data. By default the uncertainty
        in the weights estimation is considered using Bayesian bootstrap.
    SE : Standard error of the IC estimate.
        If method = BB-pseudo-BMA these values are estimated using Bayesian
        bootstrap.
    dSE : Standard error of the difference in IC between each model and
    the top-ranked model.
        It's always 0 for the top-ranked model.
    warning : A value of 1 indicates that the computation of the IC may not be
        reliable. Details see the related warning message in pm.waic and pm.loo
    """

    names = [model.name for model in model_dict if model.name]
    if not names:
        names = np.arange(len(model_dict))

    if ic == 'WAIC':
        ic_func = waic
        df_comp = pd.DataFrame(index=names,
                               columns=['WAIC', 'pWAIC', 'dWAIC', 'weight',
                                        'SE', 'dSE', 'var_warn'])

    elif ic == 'LOO':
        ic_func = loo
        df_comp = pd.DataFrame(index=names,
                               columns=['LOO', 'pLOO', 'dLOO', 'weight',
                                        'SE', 'dSE', 'shape_warn'])

    else:
        raise NotImplementedError(
            'The information criterion {} is not supported.'.format(ic))

    if len(set([len(m.observed_RVs) for m in model_dict])) != 1:
        raise ValueError(
            'The number of observed RVs should be the same across all models')

    if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']:
        raise ValueError('The method {}, to compute weights,'
                         'is not supported.'.format(method))

    ics = []
    for n, (m, t) in zip(names, model_dict.items()):
        ics.append((n, ic_func(t, m, pointwise=True)))

    ics.sort(key=lambda x: x[1][0])

    if method == 'stacking':
        N, K, ic_i = _ic_matrix(ics)
        exp_ic_i = np.exp(-0.5 * ic_i)
        Km = K - 1

        def w_fuller(w):
            return np.concatenate((w, [max(1. - np.sum(w), 0.)]))

        def log_score(w):
            w_full = w_fuller(w)
            score = 0.
            for i in range(N):
                score += np.log(np.dot(exp_ic_i[i], w_full))
            return -score

        def gradient(w):
            w_full = w_fuller(w)
            grad = np.zeros(Km)
            for k in range(Km):
                for i in range(N):
                    grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \
                        np.dot(exp_ic_i[i], w_full)
            return -grad

        theta = np.full(Km, 1. / K)
        bounds = [(0., 1.) for i in range(Km)]
        constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.},
                       {'type': 'ineq', 'fun': lambda x: np.sum(x)}]

        w = minimize(fun=log_score,
                     x0=theta,
                     jac=gradient,
                     bounds=bounds,
                     constraints=constraints)

        weights = w_fuller(w['x'])
        ses = [res[1] for _, res in ics]

    elif method == 'BB-pseudo-BMA':
        N, K, ic_i = _ic_matrix(ics)
        ic_i = ic_i * N

        b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples,
                                    random_state=seed)
        weights = np.zeros((b_samples, K))
        z_bs = np.zeros_like(weights)
        for i in range(b_samples):
            z_b = np.dot(b_weighting[i], ic_i)
            u_weights = np.exp(-0.5 * (z_b - np.min(z_b)))
            z_bs[i] = z_b
            weights[i] = u_weights / np.sum(u_weights)

        weights = weights.mean(0)
        ses = z_bs.std(0)

    elif method == 'pseudo-BMA':
        min_ic = ics[0][1][0]
        Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics])
        weights = []
        ses = []
        for _, res in ics:
            weights.append(np.exp(-0.5 * (res[0] - min_ic)) / Z)
            ses.append(res[1])

    if np.any(weights):
        for i, (idx, res) in enumerate(ics):
            diff = res[4] - ics[0][1][4]
            d_ic = np.sum(diff)
            d_se = np.sqrt(len(diff) * np.var(diff))
            se = ses[i]
            weight = weights[i]
            df_comp.at[idx] = (round(res[0], round_to),
                               round(res[2], round_to),
                               round(d_ic, round_to),
                               round(weight, round_to),
                               round(se, round_to),
                               round(d_se, round_to),
                               res[3])

        return df_comp.sort_values(by=ic)
Esempio n. 34
0
def _categorical_params(n_cats, sep):
    sep = min(.95, sep)
    # use 5 as an arbitrary cardinality
    # add .01 to probability to make sure there are no zeros
    ps = (dirichlet.rvs([1.-sep]*5, n_cats)+.01)/1.05
    return [{'alpha': p} for p in ps]
Esempio n. 35
0
def gen_docs(n_docs=None, n_topics=None, n_words=None, n_words_per_doc=10,
             alpha=.1, beta=.1, noise=.0, phis=None, thetas=None):
    """ Generate documents from LDA generative process.

    Parameters
    ----------
    n_docs : int, optional if both phis and thetas are not None
        Number of documents to generate
    n_topics : int, optional if both phis and thetas are not None
        Number of topics
    n_words : int, optional if both phis and thetas are not None
        Number of words in the vocabulary
    n_words_per_doc : int, or array-like(int)
        Number of words to generate for each document
    alpha : float (0, Inf)
        Symmetric dirichlet parameter for the distribution of topics in
        documents.
    beta : float (0, Inf)
        Symmteric dirichlet parameter for the distribution of words in topics.
    noise : float [0, 1], optional
        Proportion of generated words to scramble.

    Returns
    -------
    docs : list
        List of data structures (dict) represtenting the generated documents.
        Each doc has a 'counts' key where each key is an integer word index and
        each value is the number of times that word occurs in the document.
    phis : list
        List of topics (weights of words)
    """
    if phis is None:
        phis = []
        for j in range(n_topics):
            phis.append(dirichlet.rvs([beta]*n_words)[0])
    else:
        n_words = len(phis[0])
        if not all(len(phi) == n_words for phi in phis):
            raise ValueError("All arrays in phis must be the same length")

        n_topics = len(thetas[0])
        if not all(len(theta) == n_topics for theta in thetas):
            raise ValueError("All arrays in thetas must be the same length")

        if len(phis) != n_topics:
            # FIXME: This is stupid error message
            raise ValueError("phis and thetas are inconsistent")

        n_docs = len(thetas)

    if isinstance(n_words_per_doc, int):
        n_words_per_doc = [n_words_per_doc]*n_docs

    if isinstance(n_words_per_doc, (list, np.ndarray)):
        if len(n_words_per_doc) != n_docs:
            raise ValueError("n_docs and n_words_per_doc are inconsistent")

    docs = []
    for k in range(n_docs):
        if thetas is None:
            theta_k = dirichlet.rvs([alpha]*n_topics)[0]
        else:
            theta_k = thetas[k]
        doc = {
            'words': set([i for i in range(n_words)]),
            'counts': dict((i, 0) for i in range(n_words)),
            'w': [],
            'theta': theta_k
        }
        for _ in range(n_words_per_doc[k]):
            j = int(discrete_draw(theta_k))
            w = int(discrete_draw(phis[j]))
            if noise > 0:
                if np.random.rand() < noise:
                    w = random.randrange(n_words)
            doc['counts'][w] += 1
            doc['w'].append(w)
        docs.append(doc)

    return docs, phis