예제 #1
0
def test_parameter_estimation():
    N = 100  # number of observations
    K = 50  # dimension of Dirichlet

    _alpha = np.random.gamma(1, 1) * np.random.dirichlet(
        [1.] * K)  # ground truth alpha

    obs = np.random.dirichlet(_alpha,
                              size=N) + eps  # draw N samples from Dir(_alpha)
    obs /= np.sum(obs, 1)[:, np.newaxis]  # renormalize for added eps

    initial_alpha = np.ones(K)  # first guess on alpha

    # estimating
    est_alpha = parameter_estimation(obs, initial_alpha)

    g_ll = 0  # log-likelihood with ground truth parameter
    b_ll = 0  # log-likelihood with initial guess of alpha
    ll = 0  # log-likelihood with estimated parameter
    for i in xrange(N):
        g_ll += dirichlet.logpdf(obs[i], _alpha)
        b_ll += dirichlet.logpdf(obs[i], initial_alpha)
        ll += dirichlet.logpdf(obs[i], est_alpha)

    print('Test with parameter estimation')
    print('likelihood p(obs|_alpha) = %.3f' % g_ll)
    print('likelihood p(obs|initial_alpha) = %.3f' % b_ll)
    print('likelihood p(obs|estimate_alpha) = %.3f' % ll)
    print('likelihood difference = %.3f' % (g_ll - ll))
예제 #2
0
 def _sample_alpha(
         self,
         step_size=0.01):  # sampling the hyperparamter for the dirichlets
     if not self.unannealed_dists:
         pass
     else:
         old_f_val = 0.0
         new_f_val = 0.0
         for dist in self.unannealed_dists.values():
             alpha_vec = np.zeros_like(dist)
             alpha_vec.fill(self.alpha)
             old_f_val += dirichlet.logpdf(dist, alpha_vec)
         new_alpha = 0
         while new_alpha < self.alpha_range[
                 0] or new_alpha > self.alpha_range[1]:
             new_alpha = self.alpha + np.random.normal(0.0, step_size)
         for dist in self.unannealed_dists.values():
             alpha_vec = np.zeros_like(dist)
             alpha_vec.fill(new_alpha)
             new_f_val += dirichlet.logpdf(dist, alpha_vec)
         acceptance_thres = np.log(np.random.uniform(0.0, 1.0))
         mh_ratio = new_f_val - old_f_val
         if mh_ratio > acceptance_thres:
             self.alpha = new_alpha
             logging.info(
                 'pcfg alpha samples a new value {} with log ratio {}/{}'.
                 format(new_alpha, mh_ratio, acceptance_thres))
예제 #3
0
def test_parameter_estimation():
    N = 100 # number of observations
    K = 50  # dimension of Dirichlet

    _alpha = np.random.gamma(1,1) * np.random.dirichlet([1.]*K) # ground truth alpha
    
    obs = np.random.dirichlet(_alpha, size=N) + eps # draw N samples from Dir(_alpha)
    obs /= np.sum(obs, 1)[:,np.newaxis] #renormalize for added eps

    initial_alpha = np.ones(K) # first guess on alpha

    #estimating 
    est_alpha = parameter_estimation(obs, initial_alpha)

    g_ll = 0 #log-likelihood with ground truth parameter
    b_ll = 0  #log-likelihood with initial guess of alpha
    ll = 0 #log-likelihood with estimated parameter    
    for i in xrange(N):
        g_ll += dirichlet.logpdf(obs[i], _alpha)
        b_ll += dirichlet.logpdf(obs[i], initial_alpha)
        ll += dirichlet.logpdf(obs[i], est_alpha)

    print 'Test with parameter estimation'
    print 'likelihood p(obs|_alpha) = %.3f' % g_ll
    print 'likelihood p(obs|initial_alpha) = %.3f' % b_ll
    print 'likelihood p(obs|estimate_alpha) = %.3f' % ll
    print 'likelihood difference = %.3f' % (g_ll - ll)
예제 #4
0
    def propose(self):
        if len(self.value) == 1: return PriorDirichletDistribution(value=self.value,alpha=self.value), 0.0 # handle singleton rules

        ret = PriorDirichletDistribution(value = numpy.random.dirichlet(self.alpha), alpha=self.alpha)

        fb = dirichlet.logpdf(ret.value, self.alpha) - dirichlet.logpdf(self.value, self.alpha)

        return ret, fb
예제 #5
0
    def fit(self,
            data,
            p_transitions,
            p_emissions,
            p_start,
            start_pseudocounts,
            transition_pseudocounts,
            emission_pseudocounts,
            verbose=True):
        """
        Run the EM algorithm to find the maximum likelihood or maximum a posteriori (if pseudocounts >0) estimates
        of the model parameters
        :param data: Training data. Shape = (number of sequences X length of sequence)
        :param p_transitions: Initial guess for transition probability matrix. Shape = (num_states X num_states)
        :param p_emissions: Initial guess for emission probability matrix. Shape = (num_states X num_emissions)
        :param p_start: Initial guess for first state occupancy probability matrix. Shape = (num_states)
        :param start_pseudocounts: Parameters for Beta prior on first state occupancy. Shape = (num_states)
        :param transition_pseudocounts: Parameters for Beta priors on transition probabilities.
               Shape = (num_states X num_states)
        :param emission_pseudocounts: Parameters for Dirichlet priors on emission probabilities.
               Shape = (num_states X num_emissions)
        :param verbose: Show the improvement in log likelihood/log posterior through training. Default = True
        :return:
        """
        self.p_transitions = p_transitions / np.sum(
            p_transitions, axis=1, keepdims=True)
        self.p_emissions = p_emissions / np.sum(
            p_emissions, axis=1, keepdims=True)
        self.p_start = p_start / np.sum(p_start)
        self.start_pseudocounts = start_pseudocounts
        self.transition_pseudocounts = transition_pseudocounts
        self.emission_pseudocounts = emission_pseudocounts
        self.converged = False

        for iter in range(self.max_iter):
            self.get_state_likelihood(data.astype("int"))
            alpha, beta, scaling, expected_latent_state, expected_latent_state_pair = self.E_step(
            )
            self.M_step(data.astype("int"), expected_latent_state,
                        expected_latent_state_pair)
            current_log_likelihood = np.sum(np.log(scaling))
            current_log_prior = np.sum([dirichlet.logpdf(self.p_transitions[state, :], self.transition_pseudocounts[state, :])
                                        + dirichlet.logpdf(self.p_emissions[state, :], self.emission_pseudocounts[state, :])
                                        for state in range(self.num_states)]) \
                                + dirichlet.logpdf(self.p_start, self.start_pseudocounts)
            self.log_posterior.append(current_log_likelihood +
                                      current_log_prior)

            if iter >= 1:
                improvement = self.log_posterior[-1] - self.log_posterior[-2]
                if verbose:
                    print("Training improvement in log posterior:",
                          improvement)
                if improvement <= self.threshold:
                    self.converged = True
                    break
예제 #6
0
 def test_numpy_rvs_shape_compatibility(self):
     np.random.seed(2846)
     alpha = np.array([1.0, 2.0, 3.0])
     x = np.random.dirichlet(alpha, size=7)
     assert_equal(x.shape, (7, 3))
     assert_raises(ValueError, dirichlet.pdf, x, alpha)
     assert_raises(ValueError, dirichlet.logpdf, x, alpha)
     dirichlet.pdf(x.T, alpha)
     dirichlet.pdf(x.T[:-1], alpha)
     dirichlet.logpdf(x.T, alpha)
     dirichlet.logpdf(x.T[:-1], alpha)
예제 #7
0
 def test_numpy_rvs_shape_compatibility(self):
     np.random.seed(2846)
     alpha = np.array([1.0, 2.0, 3.0])
     x = np.random.dirichlet(alpha, size=7)
     assert_equal(x.shape, (7, 3))
     assert_raises(ValueError, dirichlet.pdf, x, alpha)
     assert_raises(ValueError, dirichlet.logpdf, x, alpha)
     dirichlet.pdf(x.T, alpha)
     dirichlet.pdf(x.T[:-1], alpha)
     dirichlet.logpdf(x.T, alpha)
     dirichlet.logpdf(x.T[:-1], alpha)
예제 #8
0
 def log_likelihood(cls, x, params=None, nat_params=None):
     # Compute P( x | Ѳ; α )
     assert (params is None) ^ (nat_params is None)
     (alpha, ) = params if params is not None else cls.natToStandard(
         *nat_params)
     if (isinstance(x, tuple)):
         assert len(x) == 1
         x, = x
     assert isinstance(x, np.ndarray)
     if (x.ndim == 2):
         return sum([dirichlet.logpdf(_x, alpha=alpha) for _x in x])
     assert isinstance(x, np.ndarray) and x.ndim == 1
     return dirichlet.logpdf(x, alpha=alpha)
예제 #9
0
    def propose(self):
        if len(self.value) == 1:
            return PriorDirichletDistribution(
                value=self.value,
                alpha=self.value), 0.0  # handle singleton rules

        ret = PriorDirichletDistribution(value=numpy.random.dirichlet(
            self.alpha),
                                         alpha=self.alpha)

        fb = dirichlet.logpdf(ret.value, self.alpha) - dirichlet.logpdf(
            self.value, self.alpha)

        return ret, fb
예제 #10
0
 def log_prior(self):
     lp = 0
     for k in range(self.nb_states):
         alpha = self.prior['alpha'] * np.ones(self.nb_states)\
                 + self.prior['kappa'] * (np.arange(self.nb_states) == k)
         lp += dirichlet.logpdf(self.matrix[k], alpha)
     return lp
예제 #11
0
def logprob(phi_c, phi_parent, root, alpha):
  x = mkarray(phi_c.values())
  if phi_parent == root:
    a = np.ones_like(x)
  else:
    a = alpha * mkarray(phi_parent.values())
  return dirichlet.logpdf(x, a)
    def calculate_nav_log_joint(self):
        log_joint = 0
        for nav_topic_id in range(self.num_nav_topics):
            log_joint += calculate_multivariate_normal_logpdf(
                self.nav_topic_means[-1][nav_topic_id],
                self.nav_topic_mean_prior_means[nav_topic_id],
                CovarianceMatrix.scaled(
                    self.nav_topic_covariances[-1][nav_topic_id],
                    (1 / self.nav_topic_mean_prior_kappa)))
            log_joint += invwishart.logpdf(
                self.nav_topic_covariances[-1][nav_topic_id].matrix,
                self.nav_topic_covariance_prior_dof,
                self.nav_topic_covariance_prior_scale)

        for article_id in range(len(self.training_data.articles)):
            log_joint += dirichlet.logpdf(
                self.nav_article_proportions[-1][article_id],
                self.nav_article_topic_proportions_prior_alpha)

            for article_nav_id, nav_id in enumerate(
                    self.training_data.article_navs[article_id]):
                nav_topic_assignment = self.nav_article_nav_assignments[-1][
                    article_id][article_nav_id]
                log_joint += np.log(self.nav_article_proportions[-1]
                                    [article_id][nav_topic_assignment])
                log_joint += self.calculate_topic_nav_logprob(
                    nav_topic_assignment, nav_id)

        return log_joint
예제 #13
0
    def propose(self):

        if len(self.value) == 1: return copy(self), 0.0 # handle singleton rules

        v = numpy.random.dirichlet(self.value * self.proposal_scale)

        # add a tiny bit of smoothing away from 0/1
        v = (1.0 - DirichletDistribution.SMOOTHING) * v + DirichletDistribution.SMOOTHING / 2.0
        # and renormalize it (both slightly breaking MCMC)
        v = v / sum(v)

        ret = copy(self)
        ret.set_value(v)

        fb = dirichlet.logpdf(ret.value, self.value * self.proposal_scale) -\
             dirichlet.logpdf(self.value, ret.value * self.proposal_scale)

        return ret, fb
예제 #14
0
 def _log_joint(self, x, Z):
     temp = (dirichlet.logpdf(self.pi, self.alpha) +
             np.sum(np.log(self.pi[Z])) +
             np.sum(norm.logpdf(self.beta, 0, self.sigma)) +
             np.sum(norm.logpdf(self.rho, 0, self.sigma)))
     loc = np.array([
         self.beta[Z[v], :] + self.epsilon_up *
         np.sum(self.rho[Z[v], Z[self.graph[v]], :], axis=0)
         for v in range(self.n_cells)
     ])
     return temp + np.sum(norm.logpdf(x, loc, self.S * np.ones(loc.shape)))
예제 #15
0
def likelihood(position):
    pars = np.copy(position)
    # pars = 10 ** pars
    print(pars)
    try:
        cost = np.sum([dirichlet.logpdf(sample, pars) for sample in data_3states])
        print('cost', cost)
    except ValueError as e:
        print(e)
        cost = -np.inf
    return cost
예제 #16
0
    def propose(self):

        if len(self.value) == 1:
            return copy(self), 0.0  # handle singleton rules

        v = numpy.random.dirichlet(self.value * self.proposal_scale)

        # add a tiny bit of smoothing away from 0/1
        v = (1.0 - DirichletDistribution.SMOOTHING
             ) * v + DirichletDistribution.SMOOTHING / 2.0
        # and renormalize it (both slightly breaking MCMC)
        v = v / sum(v)

        ret = copy(self)
        ret.set_value(v)

        fb = dirichlet.logpdf(ret.value, self.value * self.proposal_scale) -\
             dirichlet.logpdf(self.value, ret.value * self.proposal_scale)

        return ret, fb
예제 #17
0
def sampleproposal(B,D,K):
    #sample pis from dirichlet(1,1...1,1)
    pi_alpha = 0.1
    pi_pr = np.random.dirichlet(pi_alpha* np.ones(K),size = B)
    logpidensity =dirichlet.logpdf(np.transpose(pi_pr),pi_alpha*np.ones(K))

    #sample mus from normal(0,25)
    mu_std = 5
    mu_pr = mu_std*np.random.randn(B,K,D)
    logmudensity = np.sum(norm.logpdf(mu_pr, scale = mu_std),axis = (1,2))

    #sample sigmas from lognormal(0,1)
    sigma_pr = np.exp(np.random.randn(B,K,D)) 
    logsigmadensity = np.sum(lognorm.logpdf(sigma_pr, s = 1),axis = (1,2))
 
    logpropdensity = logpidensity + logmudensity + logsigmadensity

    logpipriordensity = dirichlet.logpdf(np.transpose(pi_pr),np.ones(K))
    logpriordensity = logpipriordensity  + np.sum(norm.logpdf(mu_pr, scale = 1),axis = (1,2)) +logsigmadensity

    return pi_pr,mu_pr,sigma_pr, logpriordensity, logpropdensity
예제 #18
0
    def semisupervisedEM(self, Xl, Yl, Xu, tol=1e-3):
        # semi-supervised training by EM
        # stack labelled + unlabelled inputs
        X = np.row_stack([Xl, Xu])
        Yl = np.squeeze(Yl)
        # init model (post) with labelled dataset
        self.train_supervised(Xl, Yl)
        self.N = X.shape[0]  # update n labelled from train_supervised
        # init responsibility
        rl = self.r
        ru = self.predict(Xu)  # unlabelled data
        r = np.row_stack([rl, ru])

        self.ll = []  # joint-log-likelihood

        # EM iterations
        while (np.size(self.ll) < 5
               or abs(sum(self.ll[-1] - self.ll[-5:-1])) > tol):
            # M-step
            for k in range(self.K):  # update for each class
                # clusters
                self.base[k].M_step(X, r[:, k])  # update posterior params
                # dirichlet
                nk = np.sum(r[:, k])
                self.pi_map[k] = ((nk + self.alpha[k] - 1) /
                                  (self.N + np.sum(self.alpha) - self.K))

            # E-step
            ru = self.predict(Xu)  # update resp. for unlabelled
            r = np.row_stack([rl, ru])

            # log-lik unlabelled
            ll_ul = np.sum(self.lpx)
            # log-lik of base params
            lpth_D = np.array([
                self.base[k].post_logpdf(self.base[k].mu_map,
                                         self.base[k].Sig_map)
                for k in range(self.K)
            ]).sum()
            # mixing props
            self.alpha_n = np.sum(r, 0) + self.alpha  # posterior alpha
            lpi_D = dirichlet.logpdf(self.pi_map, self.alpha_n)

            # track log-lik of the joint of the model
            self.ll = np.append(self.ll, ll_ul + self.lpX + lpth_D + lpi_D)

            print('log-joint-likelihood:' + '%.4f' % self.ll[-1])

            # store the final responsibility and likelihood
        self.r  # resp. for whole semisupervised set
        # for Xu
        self.r_ul = ru
        self.lpx_ul = self.lpx
예제 #19
0
    def calculate_ne_log_joint(self):
        log_joint = 0
        for ne_topic_id in range(self.num_ne_topics):
            log_joint += dirichlet.logpdf(
                self.ne_topic_proportions[-1][ne_topic_id],
                self.ne_topic_vocab_prior_alpha)

        for article_id in range(len(self.training_data.articles)):
            log_joint += dirichlet.logpdf(
                self.ne_article_proportions[-1][article_id],
                self.ne_article_topic_proportions_prior_alpha)

            for article_ne_id, ne_id in enumerate(
                    self.training_data.article_nes[article_id]):
                ne_topic_assignment = self.ne_article_ne_assignments[-1][
                    article_id][article_ne_id]
                log_joint += np.log(self.ne_article_proportions[-1][article_id]
                                    [ne_topic_assignment])
                log_joint += np.log(
                    self.ne_topic_proportions[-1][ne_topic_assignment][ne_id])

        return log_joint
예제 #20
0
def test_frozen_dirichlet():
    np.random.seed(2846)

    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)

    d = dirichlet(alpha)

    assert_equal(d.var(), dirichlet.var(alpha))
    assert_equal(d.mean(), dirichlet.mean(alpha))
    assert_equal(d.entropy(), dirichlet.entropy(alpha))
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
        assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
예제 #21
0
    def EM(self, X, tol=1e-3):
        # train unsupervised via MAP EM
        self.N = X.shape[0]
        # init mixing props
        self.pi_map = self.alpha / np.sum(self.alpha)
        # init posterior each cluster (offsetting the prior)..
        m = X[np.random.choice(self.N, self.K, replace=False), :]  # data as mu
        [self.base[k].post_init(m[k, :]) for k in range(self.K)]
        # init responsibility
        r = self.predict(X)
        self.lml = []  # log-marg-lik
        self.ll = []  # log-joint-lik

        # EM iterations
        while (np.size(self.lml) < 5
               or abs(sum(self.lml[-1] - self.lml[-5:-1])) > tol):
            # M-step
            for k in range(self.K):  # update for each class
                # clusters
                self.base[k].M_step(X, r[:, k])  # update posterior params.
                # dirichlet
                nk = np.sum(r[:, k])
                self.pi_map[k] = ((nk + self.alpha[k] - 1) /
                                  (self.N + np.sum(self.alpha) - self.K))

            # E-step
            r = self.predict(X)  # update responsibility

            # log-lik of base params
            lpth_D = np.array([
                self.base[k].post_logpdf(self.base[k].mu_map,
                                         self.base[k].Sig_map)
                for k in range(self.K)
            ]).sum()
            # mixing props
            self.alpha_n = np.sum(r, 0) + self.alpha  # posterior alpha
            lpi_D = dirichlet.logpdf(self.pi_map, self.alpha_n)

            # append lml/ll
            self.lml = np.append(self.lml, np.sum(self.lpx))
            self.ll = np.append(self.lml, np.sum(self.lpx) + lpth_D + lpi_D)
            print('log-marginal-likelihood:' + '%.4f' % self.lml[-1])

        # store the final responsibility
        self.r = r
예제 #22
0
    def test_log_pdf_with_broadcast(self, dtype, a, a_is_samples, rv, rv_is_samples, num_samples):
        # Add sample dimension if varaible is not samples
        a_mx = mx.nd.array(a, dtype=dtype)
        if not a_is_samples:
            a_mx = add_sample_dimension(mx.nd, a_mx)
        a = a_mx.asnumpy()

        rv_mx = mx.nd.array(rv, dtype=dtype)
        if not rv_is_samples:
            rv_mx = add_sample_dimension(mx.nd, rv_mx)
        rv = rv_mx.asnumpy()

        is_samples_any = a_is_samples or rv_is_samples
        rv_shape = rv.shape[1:]

        n_dim = 1 + len(rv.shape) if is_samples_any and not rv_is_samples else len(rv.shape)
        a_np = np.broadcast_to(a, (num_samples, 3, 2))
        rv_np = numpy_array_reshape(rv, is_samples_any, n_dim)

        # Initialize rand_gen
        rand = np.random.rand(num_samples, *rv_shape)
        rand_gen = MockMXNetRandomGenerator(mx.nd.array(rand.flatten(), dtype=dtype))

        # Calculate correct Dirichlet logpdf
        r = []
        for s in range(len(rv_np)):
            a = []
            for i in range(len(rv_np[s])):
                a.append(scipy_dirichlet.logpdf(rv_np[s][i]/sum(rv_np[s][i]), a_np[s][i]))
            r.append(a)
        log_pdf_np = np.array(r)

        dirichlet = Dirichlet.define_variable(alpha=Variable(), shape=rv_shape, dtype=dtype, rand_gen=rand_gen).factor
        variables = {dirichlet.alpha.uuid: a_mx, dirichlet.random_variable.uuid: rv_mx}
        log_pdf_rt = dirichlet.log_pdf(F=mx.nd, variables=variables)

        assert np.issubdtype(log_pdf_rt.dtype, dtype)
        assert array_has_samples(mx.nd, log_pdf_rt) == is_samples_any
        if is_samples_any:
            assert get_num_samples(mx.nd, log_pdf_rt) == num_samples, (get_num_samples(mx.nd, log_pdf_rt), num_samples)
        assert np.allclose(log_pdf_np, log_pdf_rt.asnumpy())
예제 #23
0
def score_words(train_tokens):

    words = set()
    all_counts = {}
    for label, docs in train_tokens.iteritems():
        counts = defaultdict(int)
        for d in docs:
            for t in d:
                counts[t] += 1
                words.add(t)
        all_counts[label] = counts

    # d filter
    word_score = []
    for w in words:
        get_count = lambda d: d.get(w, 0)

        x = normalize(np.array(map(get_count, all_counts.values())))
        score = dirichlet.logpdf(normalize(x), np.ones(len(all_counts)) * 2)
        word_score.append((w, score))
    return word_score
    def _calc_expectation(Mu, P, V, Gamma, A, Alpha, W):
        """Calculates the conditional expectation in the E-step of the
        EM-Algorithm, given the observations and the current estimates of the
        classifier.

        Parameters
        ----------
        Mu : numpy.ndarray, shape (n_samples, n_classes)
            Mu[i,k] contains the probability of a sample X[i] to be of class
            classes_[k] estimated according to the EM-algorithm.
        V : numpy.ndarray, shape (n_samples, n_classes)
            Describes an intermediate result.
        P : numpy.ndarray, shape (n_samples, n_classes)
            P[i,k] contains the probabilities of sample X[i] belonging to class
            classes_[k], as estimated by the classifier
            (i.e., sigmoid(W.T, X[i])).

        Returns
        -------
        expectation : float
            The conditional expectation.
        """
        # Evaluate prior of weight vectors.
        all_zeroes = not np.any(Gamma)
        Gamma = Gamma if all_zeroes else np.linalg.inv(Gamma)
        prior_W = np.sum([
            multi_normal.logpdf(x=W[:, k], cov=Gamma, allow_singular=True)
            for k in range(W.shape[1])
        ])

        # Evaluate prior of alpha matrices.
        prior_Alpha = np.sum([[
            dirichlet.logpdf(x=Alpha[j, k, :], alpha=A[j, k, :])
            for k in range(Alpha.shape[1])
        ] for j in range(Alpha.shape[0])])

        # Evaluate log-likelihood for data.
        log_likelihood = np.sum(Mu * np.log(P * V + np.finfo(float).eps))
        expectation = log_likelihood + prior_W + prior_Alpha
        return expectation
def score_words(train_tokens):

    words = set()
    all_counts = {}
    for label, docs in train_tokens.iteritems():
        counts = defaultdict(int)
        for d in docs:
            for t in d:
                counts[t] += 1
                words.add(t)
        all_counts[label] = counts


    # d filter
    word_score = []
    for w in words:
        get_count = lambda d: d.get(w, 0)

        x = normalize(np.array(map(get_count, all_counts.values())))
        score = dirichlet.logpdf(normalize(x), np.ones(len(all_counts)) * 2)
        word_score.append((w, score))
    return word_score
예제 #26
0
    def sample_delta(self, nodepair, eventtime, ite):

        a_delta = 0.1
        b_delta = 0.1
        a_taus = 0.1
        b_taus = 0.1
        delta_old = self.Delta_pis
        delta_new = delta_old + norm.rvs() * ((np.sqrt(ite + 1))**(-1))

        taus_old = self.Taus_kij
        taus_new = taus_old + norm.rvs() * ((np.sqrt(ite + 1))**(-1))

        if delta_new > 0:

            ll_old = 0
            ll_new = 0

            for tt in range(len(eventtime)):
                i_parameter_contribute_from_J_old = np.zeros(self.KK)
                i_parameter_contribute_from_J_new = np.zeros(self.KK)
                if len(self.receiving_j_list[tt]) > 0:

                    for nn in range(len(self.receiving_j_list[tt])):
                        exp_time_current_s_old = np.exp(
                            -delta_old *
                            (eventtime[tt] - self.receiving_j_list[tt][nn][2]))
                        val_1_old = self.betas[
                            self.receiving_j_list[tt][nn][0],
                            (nodepair[tt, 0])] * exp_time_current_s_old * (
                                self.pis_list[self.receiving_j_list[tt][nn][0]]
                                [self.receiving_j_list[tt][nn][1]])
                        i_parameter_contribute_from_J_old += val_1_old

                        exp_time_current_s_new = np.exp(
                            -delta_new *
                            (eventtime[tt] - self.receiving_j_list[tt][nn][2]))
                        val_1_new = self.betas[
                            self.receiving_j_list[tt][nn][0],
                            (nodepair[tt, 0])] * exp_time_current_s_new * (
                                self.pis_list[self.receiving_j_list[tt][nn][0]]
                                [self.receiving_j_list[tt][nn][1]])
                        i_parameter_contribute_from_J_new += val_1_new

                i_parameter_contribute_from_prei = self.betas[
                    (nodepair[tt, 0]), (nodepair[tt, 0])] * (self.pis_list[
                        (nodepair[tt, 0])][self.sender_receiver_num[tt][0]])
                psi_i_s_old = i_parameter_contribute_from_J_old + i_parameter_contribute_from_prei
                psi_i_s_new = i_parameter_contribute_from_J_new + i_parameter_contribute_from_prei

                ll_old += dirichlet.logpdf(
                    self.pis_list[(
                        nodepair[tt, 0])][self.sender_receiver_num[tt][0] + 1],
                    psi_i_s_old)
                ll_new += dirichlet.logpdf(
                    self.pis_list[(
                        nodepair[tt, 0])][self.sender_receiver_num[tt][0] + 1],
                    psi_i_s_new)

            ll_old += gamma.logpdf(delta_old, a=a_delta, scale=b_delta)
            ll_new += gamma.logpdf(delta_new, a=a_delta, scale=b_delta)

            if np.log(np.random.rand()) < (ll_new - ll_old):
                self.Delta_pis = delta_new

        if taus_new > 0:
            ll_old = 0
            ll_new = 0

            judge = np.where(self.b_ij > 0)[0]
            b_nonzero = self.b_ij[judge]
            receiving_nozero1 = [
                self.mutually_exciting_pair[judge_i] for judge_i in judge
            ]

            receiving_time = [
                eventtime[receiving_nozero1[it][b_nonzero[it] - 1]]
                for it in range(len(b_nonzero))
            ]

            ll_old += np.sum(-taus_old * (eventtime[judge] - receiving_time))
            ll_new += np.sum(-taus_new * (eventtime[judge] - receiving_time))

            ll_old -= self.alpha * np.sum(
                (taus_old**(-1)) * (1 - np.exp(-taus_old *
                                               (eventtime[-1] - eventtime))))
            ll_new -= self.alpha * np.sum(
                (taus_new**(-1)) * (1 - np.exp(-taus_new *
                                               (eventtime[-1] - eventtime))))

            ll_old += gamma.logpdf(taus_old, a=a_taus, scale=b_taus)
            ll_new += gamma.logpdf(taus_new, a=a_taus, scale=b_taus)

            if np.log(np.random.rand()) < (ll_new - ll_old):
                self.Taus_kij = taus_new
예제 #27
0
def _learn_global_mixture_weights(alpha, multinomials, val_data, num_em_iter=100, tol=0.001):
    """
    Learning the mixing weights for mixture of two multinomials. Each observation is considered as a data point
    and the mixing weights (\pi) are learned using all the points.

    NOTE: In order for the algorithm to work, there can be no location that can get 0 probability by both the mem_mult
    and the mf_mult. In my runs, I use MPE to estimate the mf_mult while using MLE for the mum_mul. That way the mf_mult
    has no 0 values.


     INPUT:
    -------
        1. alpha:       <float / (2, ) ndarray>   Dirichlet prior for the pi learning. If <float> is given it is treated
                                                  as a flat prior. Has to be bigger than 1.
        2. multinomials:    list[<(U, C) ndarray>]    each row is the multinomial parameter according to the "self" data
        4. val_data:    <(N, 3) ndarray>    each row is [ind_id, loc_id, counts]
        5. num_em_iter: <int>               number of em iterations
        6. tol:         <float>             convergence threshold

     OUTPUT:
    --------
        1. pi:  <(N, ) ndarray>     mixing weights.
        2. log likelihood reached.

     RAISE:
    -------
        1. ValueError:
                a. alphas are not bigger than 1
                b. the multinomial's rows don't sum to 1
                c. _There is a location with both mults 0 (see NOTE)

    """
    num_comp = len(multinomials)
    if np.any(alpha <= 1):
        raise ValueError('alpha values have to be bigger than 1')

    for i, mult in enumerate(multinomials):
        if np.any(np.abs(np.sum(mult, axis=1) - 1) > 0.001):
            raise ValueError('component %d param is not a proper multinomial -- all rows must sum to 1' % i)

    if type(alpha) == float or type(alpha) == int:
        alpha = np.ones(num_comp) * alpha * 1.

    # Creating responsibility matrix and initializing it hard assignment on random
    log_like_tracker = [-np.inf]
    pi = np.ones(num_comp) / num_comp
    start = time.time()
    em_iter = 0
    for em_iter in xrange(1, num_em_iter + 1):
        # Evey 5 iteration we will compute the posterior log probability to see if we converged.
        if em_iter % 2 == 0:

            event_prob = _data_prob(pi, multinomials, val_data)
            event_prob = np.sum(event_prob, axis=0)  # prob

            # The data likelihood was computed for each location, but it should be in the power of the number
            # of observations there, or a product in the log space.
            data_likelihood = np.log(np.array(event_prob)) * val_data[:, 2]

            prior_probability = dirichlet.logpdf(pi, alpha=alpha)
            log_likelihood = np.sum(data_likelihood + prior_probability) / np.sum(val_data[:, 2])

            if np.abs(log_likelihood - log_like_tracker[-1]) < tol:
                log.debug('[iter %d] [Reached convergence.]' % em_iter)
                break

            log.debug('[iter %d] [Likelihood: [%.4f -> %.4f]]' % (em_iter, log_like_tracker[-1], log_likelihood))
            log_like_tracker.append(log_likelihood)

        # E-Step

        resp = _data_prob(pi, multinomials, val_data)

        if np.all(resp == 0):
            raise ValueError('0 mix probability')

        resp = np.array(resp).T
        resp = normalize(resp, 'l1', axis=1)

        resp = np.multiply(resp, val_data[:, 2][:, np.newaxis])
        pi = np.sum(resp, axis=0)
        pi += alpha - 1
        pi /= np.sum(pi)

    total_time = time.time() - start
    log.debug('Finished EM. Total time = %d secs -- %.3f per iteration' % (total_time, total_time / em_iter))

    data_log_like = _data_prob(pi, multinomials, val_data)
    data_log_like = np.sum(data_log_like, axis=0)
    ll = np.sum(np.log(np.array(data_log_like)) * val_data[:, 2]) / np.sum(val_data[:, 2])
    return pi, ll
def lda_inference(doc, lda_model, adagrad=True):
    S = 10  # samples
    converged = 100.0
    rho = 1e-4  # learning rate
    if adagrad:
        epsilon = 1e-6  # fudge factor
        g_phi = np.zeros([doc.length, lda_model.num_topics])
        g_var_gamma = np.zeros([lda_model.num_topics])

    # variational parameters

    phi = np.ones([doc.length, lda_model.num_topics]) \
            / lda_model.num_topics  # N * k matrix
    var_gamma = np.ones([lda_model.num_topics]) * lda_model.alpha \
             + doc.total / float(lda_model.num_topics)

    likelihood_old = 0

    var_ite = 0
    while (converged > 1e-3 and var_ite < 1e3):
        var_ite += 1

        # sample S theta
        sample_theta = np.random.dirichlet(var_gamma, S)

        # sample S z for each word n
        sample_zs = np.zeros([doc.length, S], dtype=np.int32)
        for n in range(doc.length):
            # sample S z for each word
            sample_z = np.random.multinomial(1, phi[n, :], S)  # S * k matrix
            which_j = np.argmax(sample_z, 1)  # S length vector
            sample_zs[n, :] = which_j

        # compute gamma gradient

        dig = digamma(var_gamma)
        var_gamma_sum = np.sum(var_gamma)
        digsum = digamma(var_gamma_sum)

        ln_theta = np.log(sample_theta)  # S * k matrix

        dqdg = ln_theta - dig + digsum  # S * k matrix

        ln_p_theta = dirichlet.logpdf(np.transpose(sample_theta), \
                                [lda_model.alpha] * lda_model.num_topics)
        # S length vector
        ln_q_theta = dirichlet.logpdf(np.transpose(sample_theta), var_gamma)
        # S length vector

        # explicitly evaluate expectation
        # E_p_z = np.sum(ln_theta * np.sum(phi, 0), 1) # S length vector

        # monte-carlo estimated expectation
        E_p_z = np.zeros(S)  # S length vector
        for sample_id in range(S):
            cur_ln_theta = ln_theta[sample_id, :]
            sampled_ln_theta = []
            for n in range(doc.length):
                which_j = sample_zs[n, :]
                sampled_ln_theta += list(
                    cur_ln_theta[which_j]
                )  # (doc.counts[n] * list(cur_ln_theta[which_j]))
            E_p_z[sample_id] = np.average(sampled_ln_theta)

        grad_gamma = np.average(
            dqdg * np.reshape(ln_p_theta - ln_q_theta + E_p_z, (S, 1)), 0)

        # update
        if adagrad:
            g_var_gamma += grad_gamma**2
            grad_gamma = grad_gamma / (np.sqrt(g_var_gamma) + epsilon)
        var_gamma = var_gamma + rho * grad_gamma

        # for phi

        # for explicit evaluation of expectation
        # dig = digamma(var_gamma)
        # var_gamma_sum = np.sum(var_gamma)
        # digsum = digamma(var_gamma_sum)

        # resample from updated gamma
        sample_theta = np.random.dirichlet(var_gamma, S)
        ln_theta = np.log(sample_theta)  # S * k matrix

        for n in range(doc.length):

            # compute phi gradient
            which_j = sample_zs[n, :]

            dqdphi = 1 / phi[n][which_j]  # S length vector

            ln_p_w = lda_model.log_prob_w[which_j][:, doc.
                                                   words[n]]  # S length vector

            ln_q_phi = np.log(phi[n][which_j])  # S length vector

            # explicitly evaluate expectation
            # E_p_z_theta = dig[which_j] - digsum # S length vector

            # monte-carlo estimated expectation
            E_p_z_theta = np.zeros(S)  # S length vector
            for sample_id in range(S):
                cur_ln_theta = ln_theta[sample_id, :]
                E_p_z_theta += cur_ln_theta[which_j]
            E_p_z_theta = E_p_z_theta / S

            # print( dqdphi.shape, ln_p_w.shape, ln_q_phi.shape, E_p_z_theta.shape)
            # print (lda_model.log_prob_w[which_j][:,doc.words[n]])
            # print ln_p_w,ln_q_phi,E_p_z_theta
            grad_phi = doc.counts[n] * dqdphi * (ln_p_w - ln_q_phi +
                                                 E_p_z_theta)

            # update phi

            for i, j in enumerate(which_j):
                if adagrad:
                    g_phi[n][j] += grad_phi[i]**2
                    grad_phi[i] = grad_phi[i] / (np.sqrt(g_phi[n][j]) +
                                                 epsilon)
                # print grad_phi[i]
                phi[n][j] = phi[n][j] + rho * grad_phi[i]
                if phi[n][j] < 0:  # bound phi
                    phi[n][j] = 0
                phi[n] /= np.sum(phi[n])  # normalization

        # compute likelihood

        likelihood = compute_likelihood(doc, lda_model, phi, var_gamma)
        assert (not isnan(likelihood))
        converged = abs((likelihood_old - likelihood) / likelihood_old)
        likelihood_old = likelihood
        # print likelihood, converged
    return likelihood
예제 #29
0
 def compute_prior(self):
     return dirichlet.logpdf(self.value, self.alpha)
예제 #30
0
 def compute_prior(self):
     return dirichlet.logpdf(self.value, self.alpha)
예제 #31
0
 def test_alpha_correct_depth(self):
     alpha = np.array([1.0, 2.0, 3.0])
     x = np.ones((3, 7)) / 3
     dirichlet.pdf(x, alpha)
     dirichlet.logpdf(x, alpha)
예제 #32
0
def _learn_mix_mult(alpha, mem_mult, mf_mult, val_data, num_em_iter=100, tol=0.00001):
    """
    Learning the mixing weights for mixture of two multinomials. Each observation is considered as a data point
    and the mixing weights (\pi) are learned using all the points.

    NOTE: In order for the algorithm to work, there can be no location that can get 0 probability by both the mem_mult
    and the mf_mult. In my runs, I use MPE to estimate the mf_mult while using MLE for the mum_mul. That way the mf_mult
    has no 0 values.


     INPUT:
    -------
        1. alpha:       <float / (2, ) ndarray>   Dirichlet prior for the pi learning. If <float> is given it is treated
                                                  as a flat prior. Has to be bigger than 1.
        2. mem_mult:    <(I, L) ndarray>    each row is the multinomial parameter according to the "self" data
        3. mf_mult:     <(I, L) ndarray>    each row is the multinomial parameter according to the matrix factorization
        4. val_data:    <(N, 3) ndarray>    each row is [ind_id, loc_id, counts]
        5. num_em_iter: <int>               number of em iterations
        6. tol:         <float>             convergence threshold

     OUTPUT:
    --------
        1. pi:  <(2, ) ndarray>     mixing weights.

     RAISE:
    -------
        1. ValueError:
                a. alphas are not bigger than 1
                b. the multinomial's rows don't sum to 1
                c. There is a location with both mults 0 (see NOTE)

    """
    if np.any(alpha <= 1):
        raise ValueError('alpha values have to be bigger than 1')

    if np.any(np.abs(np.sum(mem_mult, axis=1) - 1) > 0.001):
        raise ValueError('mem_mult param is not a multinomial -- all rows must sum to 1')

    if np.any(np.abs(np.sum(mf_mult, axis=1) - 1) > 0.001):
        raise ValueError('mf_mult param is not a multinomial -- all rows must sum to 1')

    if type(alpha) == float or type(alpha) == int:
        alpha = np.array([alpha, alpha])

    # Creating responsibility matrix and initializing it hard assignment on random
    log_like_tracker = [-np.inf]
    pi = np.array([0.5, 0.5])
    start = time.time()
    for em_iter in range(1, num_em_iter + 1):
        # Evey 5 iteration we will compute the posterior log probability to see if we converged.
        if em_iter % 5 == 0:
            data_log_like = pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)] + \
                            pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]

            # The data likelihood was computed for each location, but it should be in the power of the number
            # of observations there, or a product in the log space.
            data_likelihood = np.log(data_log_like) * val_data[:, 2]

            prior_probability = dirch.logpdf(pi, alpha=alpha)
            log_likelihood = np.mean(data_likelihood + prior_probability)

            if np.abs(log_likelihood - log_like_tracker[-1]) < tol:
                break


            log_like_tracker.append(log_likelihood)

        # E-Step
        resp = [pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)],
                pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]]

        if np.all(resp == 0):
            raise ValueError('0 mix probability')

        resp = np.array(resp).T
        resp = normalize_mat_row(resp)

        # M-Step. Only on the \pi with Dirichlet prior alpha > 1
        pi = np.sum(resp * col_vector(val_data[:, 2]), axis=0)
        pi += alpha - 1
        pi /= np.sum(pi)

    total_time = time.time() - start
    log.debug('Finished EM. Total time = %d secs -- %.3f per iteration' % (total_time, total_time / em_iter))

    return pi
예제 #33
0
 def test_alpha_correct_depth(self):
     alpha = np.array([1.0, 2.0, 3.0])
     x = np.ones((3, 7)) / 3
     dirichlet.pdf(x, alpha)
     dirichlet.logpdf(x, alpha)
예제 #34
0
def compute_marg_likelihood_and_NSE_galaxies(y, iters, init, hypers):
    ''' Compute the marginal likelihood from the Gibbs Sampler output according to Chib (1995)
    y : (array-like) endogeneous variables
    iters: (int) length of the MCMC
    init: (array-like) initialisation parameters
    hypers: (array-like) hyper-parameters
    
    returns: (float) the marginal likelihood/normalizing constant 
    '''

    # Initialisation
    d = init['d']
    mu_params, sigma_square_params, q_params = init['mu_params'], init[
        'sigma_square_params'], init['q_params']

    mu, sigma_square, q, mu_hat, B, n_for_estim_sigma, delta, n_for_estim_q = GibbsSampler_galaxies(
        y, iters, init, hypers)

    mu_star = np.array(mu).mean(axis=0)
    sigma_square_star = np.array(sigma_square).mean(axis=0)
    q_star = np.array(q).mean(axis=0)

    ## Marginal likelihood computation P7, right column
    # First term:
    y_given_mu_and_sigma2_stars_pdf = np.stack([
        norm.pdf(x=y, loc=mu_star[i], scale=sigma_square_star[i])
        for i in range(d)
    ])[:, :, 0].T
    log_like = np.log(
        (q_star * y_given_mu_and_sigma2_stars_pdf).sum(axis=1)).sum()

    # Second term
    mu_prior = multivariate_normal.logpdf(
        x=mu_star, mean=mu_params[0], cov=mu_params[1]).sum(
        )  # Sum because of a the use of logpdf instead of pdf
    sigma_square_prior = invgamma.logpdf(x=sigma_square_star,
                                         a=sigma_square_params[0],
                                         scale=np.sqrt(
                                             sigma_square_params[1])).sum()
    q_square_prior = dirichlet.logpdf(x=q_star, alpha=q_params).sum()

    log_prior = mu_prior + sigma_square_prior + q_square_prior

    # Third term
    conditional_densities_mu = np.array([
        np.prod(multivariate_normal.pdf(x=mu_star, mean=mu_hat[i], cov=B[i]))
        for i in range(iters)
    ])

    conditional_densities_sigma = np.array([np.prod(invgamma.pdf(x=sigma_square_star, a=(sigma_square_params[0]+n_for_estim_sigma[i])/2,\
                                                         scale=(sigma_square_params[1]+delta[i])/2)) for i in range(iters)])

    conditional_densities_q = np.array([
        dirichlet.pdf(x=q_star, alpha=q_params + n_for_estim_q[i])
        for i in range(iters)
    ])

    conditional_densities = conditional_densities_mu * conditional_densities_sigma * conditional_densities_q

    log_posterior = np.log(conditional_densities.mean())

    log_marg_likelihood = log_like + log_prior - log_posterior

    #Numerical Standard Error Computation
    h = np.array([
        conditional_densities_mu, conditional_densities_sigma,
        conditional_densities_q
    ])

    h_hat = np.array([
        np.mean(conditional_densities_mu),
        np.mean(conditional_densities_sigma),
        np.mean(conditional_densities_q)
    ])

    var = compute_var_h_hat(h, h_hat)

    NSE = np.dot(np.dot((1 / h_hat).reshape(1, -1), var),
                 (1 / h_hat).reshape(-1, 1))[0, 0]

    return log_marg_likelihood, NSE
예제 #35
0
def prior_probs(param, val):
    if param == "pi":
        return dirichlet.logpdf(val, alpha=prior_pi)
    elif param == "rates":
        return dirichlet.logpdf(val, alpha=prior_er)