def _compute_log_p_data_no_split(self, y, prior): posterior = self._compute_posterior(y, prior) log_p_prior = np.log(1-self.partition_prior**(1+self.level)) log_p_data = multivariate_betaln(posterior) - multivariate_betaln(prior) return log_p_prior + log_p_data
def _compute_log_p_data_no_split(self, y): alphas = self.prior alphas_post = self._compute_posterior(y) log_p_prior = np.log(1 - self.partition_prior**(1 + self.level)) log_p_data = multivariate_betaln(alphas_post) - multivariate_betaln( alphas) return log_p_prior + log_p_data
def _compute_log_p_data_split(self, y, prior, split_indices): n_classes = len(prior) k1 = np.empty(n_classes, dtype=object) k2 = np.empty(n_classes, dtype=object) for i in range(n_classes): k1_and_total = (y == i).cumsum() total = k1_and_total[-1] k1[i] = k1_and_total[split_indices-1] k2[i] = total - k1[i] n_splits = len(split_indices) n_dim = len(prior) log_p_prior = np.log(self.partition_prior**(1+self.level) / (n_splits * n_dim)) betaln_prior = multivariate_betaln(prior) log_p_data1 = self._compute_log_p_data(k1, prior, betaln_prior) log_p_data2 = self._compute_log_p_data(k2, prior, betaln_prior) return log_p_prior + log_p_data1 + log_p_data2
def _compute_log_p_data_split(self, y, split_indices, n_dim): alphas = self.prior n_classes = len(alphas) k1 = np.array(n_classes * [None]) k2 = np.array(n_classes * [None]) for i in range(n_classes): k1_and_total = (y == i).cumsum() total = k1_and_total[-1] k1[i] = k1_and_total[split_indices - 1] k2[i] = total - k1[i] n_splits = len(split_indices) log_p_prior = np.log(self.partition_prior**(1 + self.level) / (n_splits * n_dim)) betaln_prior = multivariate_betaln(alphas) log_p_data1 = self._compute_log_p_data(k1, betaln_prior) log_p_data2 = self._compute_log_p_data(k2, betaln_prior) return log_p_prior + log_p_data1 + log_p_data2
def _compute_log_p_data(self, k, prior, betaln_prior): # see https://www.cs.ubc.ca/~murphyk/Teaching/CS340-Fall06/reading/bernoulli.pdf, equation (42) # which can be expressed as a fraction of beta functions return multivariate_betaln(prior+k) - betaln_prior