def data_generating_process(N,
                            sigma_0,
                            p_domain,
                            gamma,
                            V,
                            theta,
                            coef,
                            beta=None,
                            random_state=None):
    """

    """
    ## Set Random State
    if random_state is not None:
        np.random.seed(random_state)
    ## Update Beta
    if beta is None:
        beta = 1 / V
    ## Convert Data Types
    theta = np.array(theta)
    coef = np.array(coef)
    ## Normalization of Parameters
    theta = theta / theta.sum(axis=1, keepdims=True)
    ## Update Document Topic Concentration
    theta = theta * sigma_0
    ## Generate Topic-Word Distributions
    phi = stats.dirichlet([beta] * V).rvs(theta.shape[1])
    ## Data Storage
    X_latent = np.zeros((N, coef.shape[1]), dtype=float)
    X = np.zeros((N, phi.shape[1]), dtype=int)
    D = np.zeros(N, dtype=int)
    ## Sample Procedure
    for n in tqdm(range(N), "Sampling"):
        ## Sample Domain
        D[n] = int(np.random.rand() < p_domain)
        ## Sample Document Topic Mixture (Conditioned on Domain)
        X_latent[n] = stats.dirichlet(theta[D[n]]).rvs()
        ## Sample Number of Words
        n_d = stats.poisson(gamma).rvs()
        ## Create Document
        for _ in range(n_d):
            ## Sample Topic
            z = np.where(stats.multinomial(1, X_latent[n]).rvs()[0] > 0)[0][0]
            ## Sample Word
            w = np.random.choice(phi.shape[1], p=phi[z])
            ## Cache
            X[n, w] += 1
    ## Standardize
    X_latent_normed = standardize(X_latent, D)
    ## Compute P(y)
    py = np.zeros(N)
    py[D == 0] = (1 /
                  (1 + np.exp(-coef[[0]].dot(X_latent_normed[D == 0].T))))[0]
    py[D == 1] = (1 /
                  (1 + np.exp(-coef[[1]].dot(X_latent_normed[D == 1].T))))[0]
    ## Sample Y
    y = np.zeros(N)
    y[D == 0] = (np.random.rand((D == 0).sum()) < py[D == 0]).astype(int)
    y[D == 1] = (np.random.rand((D == 1).sum()) < py[D == 1]).astype(int)
    return X_latent, X, y, D, theta, phi
예제 #2
0
 def generate(
     self,
     alpha,
     beta,
     n,
     k,
 ):
     dirichlet()
     pass
예제 #3
0
    def jitter(self, concentration=100):
        pi = self.params[0]
        new_pi = npr.dirichlet(concentration * pi) + 1e-8
        new_pi /= new_pi.sum()
        fwd_lp = dirichlet(concentration * pi).logpdf(new_pi)
        rev_lp = dirichlet(concentration * new_pi).logpdf(pi)

        new_cluster = copy.deepcopy(self)
        new_cluster._params = (new_pi,)
        return new_cluster, fwd_lp, rev_lp
예제 #4
0
def batch_dirichlet(alpha):
    """Batched `np.ndarray` of Dirichlet frozen distributions.

  To get each frozen distribution, index the returned `np.ndarray` followed by
  `item(0)`.
  """
    if alpha.ndim == 1:
        return stats.dirichlet(alpha)
    return np.array([
        stats.dirichlet(vec) for vec in alpha.reshape([-1, alpha.shape[-1]])
    ]).reshape(alpha.shape[:-1])
예제 #5
0
 def sample_pi(self):
     """
     sample pi from posterior
     """
     param = np.ones(self.nClass) * self.alpha / self.nClass
     param += self.counts
     self.pi = stats.dirichlet(param).rvs(size=1).flatten()
예제 #6
0
def test_multiple_entry_calls():
    # Test that calls with multiple x vectors as matrix work

    np.random.seed(2846)

    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)
    d = dirichlet(alpha)

    num_tests = 10
    num_multiple = 5
    xm = None
    for i in range(num_tests):
        for m in range(num_multiple):
            x = np.random.uniform(10e-10, 100, n)
            x /= np.sum(x)
            if xm is not None:
                xm = np.vstack((xm, x))
            else:
                xm = x
        rm = d.pdf(xm.T)
        rs = None
        for xs in xm:
            r = d.pdf(xs)
            if rs is not None:
                rs = np.append(rs, r)
            else:
                rs = r
        assert_array_almost_equal(rm, rs)
예제 #7
0
    def log_prior(self):
        """
        Compute the log likelihood of a set of SBM parameters

        :param x:    (m,p,v) tuple
        :return:
        """
        from scipy.stats import dirichlet
        from graphistician.internals.utils import normal_inverse_wishart_log_prob
        lp = 0

        # Get the log probability of the block probabilities
        lp += dirichlet(self.pi).logpdf(self.m)

        # Get the prior probability of the Gaussian parameters under NIW prior
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                lp += normal_inverse_wishart_log_prob(self._gaussians[c1][c2])

        if self.special_case_self_conns:
            lp += normal_inverse_wishart_log_prob(self._self_gaussian)

        # Get the probability of the block assignments
        lp += (np.log(self.m)[self.c]).sum()
        return lp
예제 #8
0
 def log_prior(self):
     lp = super(MixtureOfLatentSpaceModels, self).log_prior()
     # p({h} | nu)
     lp += np.dot(np.bincount(self.hs, minlength=self.H), np.log(1e-16 + self.nu))
     # p(nu)
     lp += dirichlet(self.alpha / self.H * np.ones(self.H)).logpdf(1e-16 + self.nu)
     return lp
예제 #9
0
def sample_proposal(a, m, N):
    """
    Sample from the proposal distribution

    Parameters
    ----------
    a, m : (k+1,) np.ndarray, dtype=float; float in [0, 1]
        the parameters for the proposal distribution (Dirichlet(α) x
        Bernoulli(m))
    N : int
        size of the sample

    Returns
    -------
    ss : (N, k+1) np.ndarray, dtype=float
        the switch positions for each sample
    thetas : (N,) np.ndarray, dtype=int
        the initial state for each sample

    See also
    --------
    fit_proposal
    """
    ss = stats.dirichlet(a).rvs(N)
    thetas = (np.random.rand(N) < m).astype(int)
    return ss, thetas
예제 #10
0
def proposal(a, m, ss, thetas):
    """
    Evaluate the proposal distribution at a given point

    Parameters
    ----------
    a, m : (k+1,) np.ndarray, dtype=float; float in [0, 1]
        the parameters for the proposal distribution (Dirichlet(α) x
        Bernoulli(m))
    ss : (n, k+1) np.ndarray, dtype=float
    thetas : (n,) np.ndarray, dtype=int

    Returns
    -------
    float

    See also
    --------
    sample_proposal
    """
#     try: # this was supposed to catch instances where dirichlet.pdf raises a
           # ValueError due to the sample points not lying strictly within the
           # simplex, i.e 0 < s < 1. But this should not happen in production
           # anyways, so it seems appropriate to raise this error.
    with np.errstate(under='ignore'): # pdf(.) == exp(_logpdf(.)) and exp may underflow
        return (
            stats.dirichlet(a).pdf(ss.T)
            * ( m*thetas + (1-m)*(1-thetas) )
        )
예제 #11
0
def random_state(d):
    num_basis_vectors = 2**d
    shape = [2] * d
    real_part = np.sqrt(dirichlet(alpha=[1]*num_basis_vectors).rvs()[0, :])
    imag_part = np.exp(1j * np.random.uniform(0, 2*np.pi, size=num_basis_vectors))
    amplitudes = (real_part * imag_part).reshape(shape)
    return qc.state.State(amplitudes)
예제 #12
0
def make_Dirichland(N,K, concentration_params, scalar=2):

    our_dirichlet = stats.dirichlet(concentration_params) #the distribution we use

    NK_land = np.random.rand(N, 2**(K+1))

    fitnesses = []
    permutations = []
    dir_draw = our_dirichlet.rvs()[0]

    all_permutations = list(itertools.product([0,1],repeat=N))

    for permutation in all_permutations:

        genome_fitness = 0
        for currIndex in np.arange(N):
            #get the fitness indices from each k based upon local gene values
            localgenes = permutation[currIndex:currIndex+K+1]
            #loop through to next if were are near the nth index
            if currIndex+K+1 > N:
                localgenes = np.append(localgenes,permutation[0:currIndex-(N-K)+1])
            #get index fitness  is stored at
            interactIndex = ((2**(np.arange(K+1)*(localgenes)))*localgenes).sum()
            #update fitness

            genome_fitness += NK_land[currIndex,interactIndex] * (1-dir_draw[currIndex])
        fitnesses.append(genome_fitness)
        permutations.append("".join([str(i) for i in permutation]))

    df = pd.DataFrame(fitnesses,index=permutations,columns=["Fitness"])
    df.loc[:,'Location'] = all_permutations
    return df
예제 #13
0
파일: learning.py 프로젝트: pmdoll/tomcat
    def sample_parameters(self, evidence_set, state_sample):
        """
        This method samples parameters given the previously sampled states
        """

        # Pre-processing to sample parameters faster
        posteriors_theta_s = self.model.parameter_priors.theta_s_priors.copy()
        posteriors_pi_lt = self.model.parameter_priors.pi_lt_priors.copy()

        for t in range(1, evidence_set.time_slices):
            for d in range(evidence_set.number_of_data_points):
                # Incrementing the prior parameters
                posteriors_theta_s[state_sample[d][t -
                                                   1]][state_sample[d][t]] += 1
                posteriors_pi_lt[state_sample[d][t]][
                    1 - evidence_set.lt_evidence[d][t]] += 1

        # Sample parameters
        theta_s_sample = np.zeros(
            (self.model.number_of_states, self.model.number_of_states))
        pi_lt_sample = np.zeros((self.model.number_of_states, 2))

        for state in range(self.model.number_of_states):
            sample = dirichlet(posteriors_theta_s[state]).rvs()[0]
            theta_s_sample[state] = sample

            sample = beta(*posteriors_pi_lt[state]).rvs()
            pi_lt_sample[state] = [1 - sample, sample]

        return theta_s_sample, pi_lt_sample
예제 #14
0
 def __init__(self,
              a_scale=A_SCALE,
              b_scale=B_SCALE,
              c_d_dirichlet_alpha=C_D_DIRICHLET_ALPHA):
     self.a = lognorm(s=1., scale=a_scale)
     self.b = norm(scale=b_scale)
     self.c_d = dirichlet(alpha=c_d_dirichlet_alpha)
예제 #15
0
def run_gam_effective_r_from_empirical(state_data,
                                       n_splines=25,
                                       algo=GammaGAM,
                                       n_bootstrap=100):

    # for numerical stability
    epsilon = 1

    R_series = (
        state_data['confirmed_new'] /
        state_data['confirmed_total'].shift(1)).dropna() * 1 / RECOVERY_RATE

    X = np.arange(R_series.shape[0])
    y = R_series.values + epsilon

    # running GAM in bootstrap
    bootstrap = []
    for _ in range(n_bootstrap):

        weights = dirichlet([1] * R_series.shape[0]).rvs(1)

        gam = algo(s(0, n_splines) + l(0))
        gam.fit(X, y, weights=weights[0])

        bootstrap.append(gam)

    preds = pd.DataFrame([m.predict(X) - epsilon for m in bootstrap]).T

    estimate_rt = pd.DataFrame(index=R_series.index)
    estimate_rt['ML'] = preds.mean(axis=1).values
    estimate_rt['Low_90'] = preds.quantile(0.05, axis=1).values
    estimate_rt['High_90'] = preds.quantile(0.95, axis=1).values

    return estimate_rt.dropna()
예제 #16
0
 def perturbProportion(self, irr_range, aucpn_range):
     if not self.quiet:
         print('Perturb Proportion')
     prop = self.getMarkedParOldValue()
     a = 0.25
     if self.changeInfo['is_positive']:
         prop_1 = dirichlet(np.ones(self.n_comps_pos)).rvs([])
     else:
         prop_1 = dirichlet(np.ones(self.n_comps_neg)).rvs([])
     new_prop = (1 - a) * prop + a * prop_1
     self.proposeChange(new_prop)
     while not (self.isMetricUBSatisfied(irr_range, aucpn_range)):
         a = a / 2
         new_prop = (1 - a) * prop + a * prop_1
         # print(a)
         self.proposeChange(new_prop)
예제 #17
0
 def __init__(self, dim, max_comps, quiet=False):
     self.dim = dim
     self.max_comps = max_comps
     #self.n_comps_pos = randint(1, max_comps)
     #self.n_comps_neg = randint(1, max_comps)
     self.n_comps_pos = max_comps
     self.n_comps_neg = max_comps
     self.mu_pos = list()
     self.mu_neg = list()
     for i in np.arange(max(self.n_comps_pos, self.n_comps_neg)):
         mu = np.array([
             16 / np.sqrt(self.dim) * random() - 8 / np.sqrt(self.dim)
             for i in np.arange(self.dim)
         ])
         if i < self.n_comps_pos:
             self.mu_pos.append(mu)
         if i < self.n_comps_neg:
             self.mu_neg.append(mu)
     #self.mu_pos = [np.zeros(dim) for j in np.arange(self.n_comps_pos)]
     #self.mu_neg = [np.zeros(dim) for j in np.arange(self.n_comps_neg)]
     self.sig_pos = [np.identity(dim) for j in np.arange(self.n_comps_pos)]
     self.sig_neg = [np.identity(dim) for j in np.arange(self.n_comps_neg)]
     self.p_pos = dirichlet(np.ones(self.n_comps_pos)).rvs([])
     #self.p_neg = dirichlet(np.ones(self.n_comps_neg)).rvs([])
     self.p_neg = self.p_pos
     #self.changeInfo = {'changed': False, 'positive': True, 'mu': True, 'ix':0, 'oldvalue': self.mu_pos[0]}
     self.changeInfo = {'changed': False}
     self.alpha = random()
     self.quiet = quiet
예제 #18
0
    def log_prior(self):
        """
        Compute the log likelihood of a set of SBM parameters

        :param x:    (m,p,v) tuple
        :return:
        """
        from scipy.stats import dirichlet
        from graphistician.internals.utils import normal_inverse_wishart_log_prob
        lp = 0

        # Get the log probability of the block probabilities
        lp += dirichlet(self.pi).logpdf(self.m)

        # Get the prior probability of the Gaussian parameters under NIW prior
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                lp += normal_inverse_wishart_log_prob(self._gaussians[c1][c2])

        if self.special_case_self_conns:
            lp += normal_inverse_wishart_log_prob(self._self_gaussian)

        # Get the probability of the block assignments
        lp += (np.log(self.m)[self.c]).sum()
        return lp
예제 #19
0
 def test_dd_single(self):
     import numpy as np
     from scipy.stats import dirichlet
     np.set_printoptions(precision=2)
     samples = dirichlet(alpha=1000 * np.array([0.1, 0.3, 0.6])).rvs(10000)
     print("element-wise mean:", samples.mean(axis=0))
     print("element-wise standard deviation:", samples.std(axis=0))
     print('end')
예제 #20
0
 def dirichlet(G, alpha=1):
     cpd = {}
     for node in nx.topological_sort(G):
         m = G.in_degree(node) + 1
         dim = tuple([2] * m)
         table = stats.dirichlet(alpha=tuple([alpha] * (2 ** m))).rvs()[0]
         table = table.reshape(dim)
         cpd[node] = TableCPD(table, [node], list(G.predecessors(node)))
     return cpd
예제 #21
0
 def __init__(self, dim, max_comps):
     self.dim = dim
     self.max_comps = max_comps
     self.n_comps_pos = randint(1, max_comps)
     self.n_comps_neg = randint(1, max_comps)
     self.mu_pos = [
         np.array([2 * random() - 1 for i in np.arange(dim)])
         for j in np.arange(self.n_comps_pos)
     ]
     self.mu_neg = [
         np.array([2 * random() - 1 for i in np.arange(dim)])
         for j in np.arange(self.n_comps_neg)
     ]
     self.sig_pos = [spd(dim) for j in np.arange(self.n_comps_pos)]
     self.sig_neg = [spd(dim) for j in np.arange(self.n_comps_neg)]
     self.p_pos = dirichlet(np.ones(self.n_comps_pos)).rvs([])
     self.p_neg = dirichlet(np.ones(self.n_comps_neg)).rvs([])
     self.alpha = random()
예제 #22
0
 def gibbs_weight(self):
     """
     Get weight vector for each gibbs iteration
     :return: weight vector
     """
     Nk = self.components.counts[:self.components.K].tolist()
     alpha = [Nk[cid] + self.alpha / self.components.K
              for cid in range(self.components.K)]
     return stats.dirichlet(alpha).rvs(size=1).flatten()
예제 #23
0
def test_simple_values():
    alpha = np.array([1, 1])
    d = dirichlet(alpha)

    assert_almost_equal(d.mean(), 0.5)
    assert_almost_equal(d.var(), 1. / 12.)

    b = beta(1, 1)
    assert_almost_equal(d.mean(), b.mean())
    assert_almost_equal(d.var(), b.var())
예제 #24
0
파일: lda.py 프로젝트: seigow/topic-models
    def variational_bayesian_inference_double_loop(self,S_outer=0,S_inner=0):
        # Initialize approximation of posterior distribution
        q_z = np.array([[[1/self.K for k in range(self.K)] for _ in d] for _ in self.docs])
        q_theta = np.array([1/K for d in self.docs for _ in range(K)])
        q_phi = np.array([1/K for k in range(K) for _ in range(self.V)])

        # Derivation variational lower bound
        #TODO
        # F = np.sum(q_z*q_theta*q_phi*np.log(p_w*p_z)

        # Initialize E[n_d_k] and E[n_k_v]
        # TODO: is this correct??
        E_n_k_v = q_z.sum(axis=0)
        # Convergence criterion
            # A. difference between variational lower bound in each iteration
            # B. the number of iterations

        for _o in range(S_outer):
            for d,doc in enumerate(doc):
                n_d = len(doc)
                # Initialize E[n_d_k] = n_d/K
                E_n_d_k = np.array(len(doc) / K)
                # Absolute Error
                # TODO

                for _i in range(S_inner):
                    for i in range(n_d):
                        # Update q(z_d_i)
                        q_z = {np.exp(sp.psi(E_n_k_v[v]+self.beta[v])) # K-dimension
                                / np.exp(sp.psi(np.sum(E_n_k_v+np.matrix(self.beta).T,axis=0)))  # 1-dimension
                                * np.exp(sp.psi(E_n_d_k[d]+self.alpha))  # K-dimension
                                / np.exp(sp.psi(np.sum(E_n_d_k+self.alpha,axis=1)))} #1-dimension
                        new_q_z = q_z / sum(q_z)
                        q_z[d][i] = new_q_z
                    # Update q(theta_d) by (3.90)
                    #TODO: is this corrct???
                    q_theta[d] = stats.dirichlet(E_n_d_k[d]+self.alpha)
            for k in range(K):
                # Update q(phi_k) by (3.96)
                # TODO: is this corrct???
                q_phi[k] = stats.dirichlet(E_n_k_v[k]+self.beta) 
예제 #25
0
 def __init__(self, α: np.ndarray, z: np.ndarray, bor: float):
     """
     :param α: sufficient statistics of the posterior Dirichlet density on model/family frequencies
     :param z: posterior probabilities for each subject to belong to each model/family
     :param bor: Bayesian omnibus risk p(y|H0)/(p(y|H0)+p(y|H1))
     """
     self.attribution = z.copy()
     self.frequency_mean = dirichlet.mean(α)
     self.frequency_var = dirichlet.var(α)
     self.exceedance_probability = exceedance_probability(dirichlet(α))
     self.protected_exceedance_probability = self.exceedance_probability * (
         1 - bor) + bor / len(α)  # (7)
예제 #26
0
    def compute_elbo(self, phi, nu, kappa, epsilon, m, L, V, N, gamma_1, gamma_2):
        """
        Function compute the evidence lower bound as defined for HRMF-DPCMM From the variational parameters.
        :param phi: of shape [N,K]
        :param nu: of shape [1, K]
        :param kappa: of shape [1, K]
        :param epsilon: of shape [1, K]
        :param m: of shape [N, d]
        :param L: of shape [N, d, d]
        :param V: of shape [K, K]
        :param N: of shape [1, K]
        :param gamma_1: of shape [1, K]
        :param gamma_2: of shape [1, K]
        :return:
        """
        hmrf_term = 0
        log_likelihood_term = 0

        if self.weight_prior == "Dirichelet distribution":
            val = digamma(epsilon) - digamma(np.sum(epsilon))
        else:
            val = digamma(gamma_1) - digamma(gamma_1 + gamma_2) + cumsum_ex(
                digamma(gamma_2) - digamma(gamma_1 + gamma_2))

        for n in range(self.N):
            for k in range(self.K):
                log_likelihood_term += - 0.5 * phi[n, k] * nu[k] * np.trace(np.matmul(L[k,:,:], np.matmul(self.X[n,:].reshape(self.d, 1) - m[k].reshape(self.d, 1),
                                                                                        self.X[n,:].reshape(1, self.d) - m[k].reshape(1, self.d) )) ) \
                     - 0.5 * self.d * N[k] / kappa[k]
                if self.mask[n] == 0:
                    log_likelihood_term += phi[n,k] * val[k]

        for k in range(self.K):
            log_likelihood_term += 0.5 * (nu[k] - self.d + N[k]) * (multivar_digamma(nu[k], self.d) + np.log(self.eps + LA.det(L[k, :, :]))) - 0.5 * self.d * nu[k]


        for tuple in self.tuples_ml:
            hmrf_term += - self.lambda_ * np.sum(np.sum(phi[tuple[0],:].reshape(self.K, 1) * phi[tuple[1],:].reshape(1,self.K) * V))


        for k in range(self.K):
            log_likelihood_term += wishart(nu[k],L[k,:,:]).entropy() + 0.5*(multivar_digamma(nu[k], self.d) + np.log(self.eps + LA.det(L[k, :, :]))) + 0.5*self.d*np.log(eps + kappa[k]) \
                    - np.sum(phi[:,k] * np.log(phi[:,k] + eps))
            if self.weight_prior != "Dirichelet distribution":
                log_likelihood_term += beta(gamma_1[k], gamma_2[k]).entropy()

        if self.weight_prior == "Dirichelet distribution":
            log_likelihood_term += dirichlet(epsilon).entropy()

        elbo = log_likelihood_term + hmrf_term

        return elbo/self.N, log_likelihood_term, hmrf_term
예제 #27
0
 def __init__(self, dist_params):
     '''
     Creates Dirichlet distribution with parameter `dist_params`.
     e.x.,dist_params={
     'mix_coef':[0.5,0.5],
     'dir1_params':[2,3,19],
     'dir2_params':[17,10,7]
     }
     '''
     from scipy.stats import dirichlet
     self.mix_coef = dist_params.pop('mix_coef')
     self.K = len(self.mix_coef)
     self.dir_mixtures = [dirichlet(dir_param) for dir_param in dist_params.values()]
예제 #28
0
파일: HDP-LDA.py 프로젝트: wgmueller1/mcmc
def sample_tau(state):
    # "Escobar and West's auxiliary variable method (1995)," https://lists.cs.princeton.edu/pipermail/topic-models/2011-October/001629.html
    # http://bit.ly/1FelVcL
    mk = get_mk(state)
    state['T'] = sum(mk.values()) - state['gamma']
    assert state['T'] > 0
    topics, mk_vals = zip(*mk.items())
    new_tau = dirichlet(mk_vals).rvs()[0]
    state['tau'] = {}
    for topic, tau_i in zip(topics, new_tau):
        state['tau'][topic] = tau_i
    assert set(state['tau'].keys()) - state['used_topics'] == set([-1])
    return state
예제 #29
0
def get_dirichlet_mle(x):
    """
    Get maximum likelihood estimation for dirichlet concentration parameters

    :param x: Data array of size nxp where n is the number of observations and p is the dimensionality of the
        dirichlet distribution to estimate
    :return: Dirichlet distribution (with `alpha` parameter set to MLE inferred from given data)
    """
    assert len(x.shape) == 2, 'Data array must be two dimensional'
    assert np.allclose(x.sum(axis=1), 1), 'Sum of observations across rows must equal 1'
    from ml.bayesian.dirichlet import dirichlet
    alpha = dirichlet.mle(x)
    return stats.dirichlet(alpha)
예제 #30
0
def get_dirichlet_multinomial_posterior(x, alpha):
    """
    Return posterior dirichlet distribution with dirichlet prior and multinomial likelihood

    Posterior for dirichlet distribution with parameters equal to (alpha1 + x1, alpha2 + x2, ..., alphap + xp)
    where p is the number of categories (i.e. number of columns in x)

    :param x: Integer array where index indicates category (must have length > 1)
    :param alpha: Concentration parameter for dirichlet prior; can be scalar or array of size len(x)
    :return: Dirichlet distribution
    """
    assert len(x) > 1, 'Number of categories for dirichlet/multinomial model must be > 1'
    return stats.dirichlet(x + alpha)
예제 #31
0
    def set_result(self, successes, trials):
        assert len(successes) == len(trials)
        assert len(successes) == len(self.variants)

        self.posteriors = [None] * len(successes)

        for i, (conv, vis,
                pri) in enumerate(zip(successes, trials, self.priors)):
            obs = conv + [vis - sum(conv)]
            self.posteriors[i] = dirichlet(pri + obs)

        self.conversions = successes
        self.visitors = trials
예제 #32
0
def test_K_and_K_minus_1_calls_equal():
    # Test that calls with K and K-1 entries yield the same results.

    np.random.seed(2846)

    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)

    d = dirichlet(alpha)
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_almost_equal(d.pdf(x[:-1]), d.pdf(x))
예제 #33
0
def updateMixtureWeight(Z, weightPrior):
    '''
    Z: length n, array like component indicator
    weightPrior: length K, array like prior (for the Dirichlet prior)
    '''
    unique, counts = np.unique(Z, return_counts=True)
    mixtureCounts = dict(zip(unique, counts))

    alpha = weightPrior

    for k in mixtureCounts:
        alpha[k] += mixtureCounts[k]

    return dirichlet(alpha).rvs()[0]
예제 #34
0
파일: utilities.py 프로젝트: ifsm/apollon
def sample_hist(dims: SomDims,
                data: Optional[Array] = None,
                **kwargs) -> Array:
    """Sample sum-normalized histograms.

    Args:
        dims:  Dimensions of SOM.
        data:  Input data set.

    Returns:
        Two-dimensional array in which each row is a historgram.
    """
    n_rows, n_cols, n_feats = dims
    return _stats.dirichlet(np.ones(n_feats)).rvs(n_rows * n_cols)
예제 #35
0
def updateProbs(C, probPrior):
    '''
    C: length N, array like type indicator for all points (value in 0,1,2)
        0=outside, 1=MF, 2=FM
    probPrior: length 3, array like prior (for the Dirichlet prior)
    '''
    unique, counts = np.unique(C, return_counts=True)
    typeCounts = dict(zip(unique, counts))

    alpha = copy(probPrior)

    for k in typeCounts:
        alpha[k] += typeCounts[k]

    return dirichlet(alpha).rvs()[0]
예제 #36
0
def test_2D_dirichlet_is_beta():
    np.random.seed(2846)

    alpha = np.random.uniform(10e-10, 100, 2)
    d = dirichlet(alpha)
    b = beta(alpha[0], alpha[1])

    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, 2)
        x /= np.sum(x)
        assert_almost_equal(b.pdf(x), d.pdf([x]))

    assert_almost_equal(b.mean(), d.mean()[0])
    assert_almost_equal(b.var(), d.var()[0])
예제 #37
0
def test_frozen_dirichlet():
    np.random.seed(2846)

    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)

    d = dirichlet(alpha)

    assert_equal(d.var(), dirichlet.var(alpha))
    assert_equal(d.mean(), dirichlet.mean(alpha))
    assert_equal(d.entropy(), dirichlet.entropy(alpha))
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
        assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
예제 #38
0
    def log_prior(self):
        """
        Compute the log likelihood of a set of SBM parameters

        :param x:    (m,p,v) tuple
        :return:
        """
        from scipy.stats import dirichlet, beta
        lp = 0
        lp += dirichlet(self.pi).logpdf(self.m)

        lp += beta(self.tau1 * np.ones((self.C, self.C)),
                   self.tau0 * np.ones((self.C, self.C))).logpdf(self.p).sum()
        if self.special_case_self_conns:
            lp += beta(self.tau1, self.tau0).logpdf(self.p_self)

        lp += (np.log(self.m)[self.c]).sum()
        return lp
예제 #39
0
def prob_locked(experiences, joint_pos, p_same, alpha_prior, model_prior,
                model_post=None):
    """
    Computes the Dirichlet distribution over the possible joint state
    distributions.

    :param experiences: The experiences so far
    :param joint_pos: The joint positions of all joints (array-like)
    :param p_same: The probability of two joint states being in the same
                   segment. (I.e. no change point in between)
    :param alpha_prior: The prior over the different joint locking states
    :param model_prior: The prior over the different joint dependency models
    :return: A Dirichlet distribution object giving the probability for the
             different locking state distributions
    """
    alpha = np.array(alpha_prior)
    if model_post is None:
        model_post = model_posterior(experiences, p_same, alpha_prior,
                                     model_prior)
    for joint_idx, pos in enumerate(joint_pos):
        c = create_alpha(pos,  experiences,
                         joint_idx,
                         p_same[joint_idx])

        a = model_post[joint_idx] * c
        if np.min(a) < 0:
            print("c = {}".format(c))
            print("mp = {}".format(model_post[joint_idx]))
            print("a = c * mp = {}".format(a))
        alpha += a

    if np.min(alpha) <= 0:
        print("alpha_prior = {}".format(alpha_prior))
        print("alpha = {}".format(alpha))

    d = dirichlet(alpha)
    return d
예제 #40
0
파일: entropy.py 프로젝트: bumps/bumps
 def __init__(self, alpha):
     self.alpha = alpha
     self._dist = stats.dirichlet(alpha)
     self.dim = len(alpha)
예제 #41
0
def stats(scale_factor, G0=[.2, .2, .6], N=10000):
    samples = dirichlet(alpha = scale_factor * np.array(G0)).rvs(N)  #rvs:draw random samples from this distribution.
    print("                          alpha:", scale_factor)
    print("              element-wise mean:", samples.mean(axis=0))    #sample element junzhi
    print("element-wise standard deviation:", samples.std(axis=0))      #biao zhun cha
    print()
def draw_pi(alpha):
    return sts.dirichlet(alpha).rvs()[0]
import matplotlib.pyplot as plt
import pickle


alpha = [1,1,2]
mu = [0.05,0.05,0.95] # constraint must add to 1

def dirichlet(mu, alpha):
    mu = np.array(mu)
    alpha = np.array(alpha)
    product = np.product(mu ** (alpha - 1))
    normaliser = gamma(alpha.sum())/np.product(gamma(alpha))
    result = product * normaliser
    return result

print(dirichlet(mu, alpha))
print(dirichlet(mu, alpha))

print(np.random.dirichlet([1,1,100]))

#plt.plot()

x = np.linspace(0,4,100)
y = gamma(x+2) - gamma(x)
y = np.log(x)
#plt.plot(x, y)
#plt.draw()


def create_reward(a):
    e = np.random.rand()/100
    p6 = np.dot(p1, m_0)
    p7 = {i: p5[i]+p6 for i in xrange(k)}
    return {i: np.dot(p2[i], p7[i]) for i in xrange(k)}

def draw_mu_k(mk_dict, inv_vk_dict,k):
    return {i:sts.multivariate_normal(mk_dict[i], np.linalg.inv(inv_vk_dict[i])).rvs() for i in xrange(k)}

#Constants

N = 6 # dimension of the MVN
J = 200 # number of data points
K = 3 #number of MVN distributions


#Bulding test data
true_pi = sts.dirichlet([3,6,9]).rvs()
s1 = np.random.rand(6,6)*np.random.uniform(0,30)
s1 = np.dot(s1, s1.T)
s2 = np.random.rand(6,6)*np.random.uniform(0,30)
s2 = np.dot(s1, s1.T)
s3 = np.random.rand(6,6)*np.random.uniform(0,30)
s3 = np.dot(s1, s1.T)

true_g1 =sts.multivariate_normal(np.random.uniform(-100,100,6),s1)
true_g2 =sts.multivariate_normal(np.random.uniform(-100,100,6),s2)
true_g3 =sts.multivariate_normal(np.random.uniform(-100,100,6),s3)




true_z = np.random.multinomial(J, true_pi[0])
def draw_pi(pi_0, nk_dict, k):
    alpha = np.array([nk_dict[i] for i in xrange(k)])
    return sts.dirichlet(alpha+pi_0).rvs()