def _compute_log_p_data_no_split(self, y, prior):
        posterior = self._compute_posterior(y, prior)

        log_p_prior = np.log(1-self.partition_prior**(1+self.level))
        log_p_data = multivariate_betaln(posterior) - multivariate_betaln(prior)

        return log_p_prior + log_p_data
Beispiel #2
0
    def _compute_log_p_data_no_split(self, y):
        alphas = self.prior
        alphas_post = self._compute_posterior(y)

        log_p_prior = np.log(1 - self.partition_prior**(1 + self.level))
        log_p_data = multivariate_betaln(alphas_post) - multivariate_betaln(
            alphas)

        return log_p_prior + log_p_data
    def _compute_log_p_data_split(self, y, prior, split_indices):
        n_classes = len(prior)
        k1 = np.empty(n_classes, dtype=object)
        k2 = np.empty(n_classes, dtype=object)
        for i in range(n_classes):
            k1_and_total = (y == i).cumsum()
            total = k1_and_total[-1]
            k1[i] = k1_and_total[split_indices-1]
            k2[i] = total - k1[i]

        n_splits = len(split_indices)
        n_dim = len(prior)
        log_p_prior = np.log(self.partition_prior**(1+self.level) / (n_splits * n_dim))

        betaln_prior = multivariate_betaln(prior)
        log_p_data1 = self._compute_log_p_data(k1, prior, betaln_prior)
        log_p_data2 = self._compute_log_p_data(k2, prior, betaln_prior)

        return log_p_prior + log_p_data1 + log_p_data2
Beispiel #4
0
    def _compute_log_p_data_split(self, y, split_indices, n_dim):
        alphas = self.prior
        n_classes = len(alphas)
        k1 = np.array(n_classes * [None])
        k2 = np.array(n_classes * [None])
        for i in range(n_classes):
            k1_and_total = (y == i).cumsum()
            total = k1_and_total[-1]
            k1[i] = k1_and_total[split_indices - 1]
            k2[i] = total - k1[i]

        n_splits = len(split_indices)
        log_p_prior = np.log(self.partition_prior**(1 + self.level) /
                             (n_splits * n_dim))

        betaln_prior = multivariate_betaln(alphas)
        log_p_data1 = self._compute_log_p_data(k1, betaln_prior)
        log_p_data2 = self._compute_log_p_data(k2, betaln_prior)

        return log_p_prior + log_p_data1 + log_p_data2
 def _compute_log_p_data(self, k, prior, betaln_prior):
     # see https://www.cs.ubc.ca/~murphyk/Teaching/CS340-Fall06/reading/bernoulli.pdf, equation (42)
     # which can be expressed as a fraction of beta functions
     return multivariate_betaln(prior+k) - betaln_prior