def compute_log_inv_acc_p(self, node_id, param, len_both_children_terminal, loglik, grow_nodes, \ cache, settings, data): # 1/acc for PRUNE is acc for GROW except for corrections to both_children_terminal # and grow_nodes list logprior_children = 0.0 left, right = get_children_id(node_id) if not no_valid_split_exists(data, cache, self.train_ids[left], settings): logprior_children += np.log(self.compute_pnosplit(left, param)) if not no_valid_split_exists(data, cache, self.train_ids[right], settings): logprior_children += np.log(self.compute_pnosplit(right, param)) try: check_if_zero(logprior_children - self.logprior[left] - self.logprior[right]) except AssertionError: print 'oh oh ... looks like a bug in compute_log_inv_acc_p' print 'term 1 = %s' % logprior_children print 'term 2 = %s, 2a = %s, 2b = %s' % (self.logprior[left]+self.logprior[right], \ self.logprior[left], self.logprior[right]) print 'node_id = %s, left = %s, right = %s, logprior = %s' % (node_id, left, right, self.logprior) raise AssertionError log_inv_acc_prior = np.log(self.compute_psplit(node_id, param)) \ - np.log(self.compute_pnosplit(node_id, param)) \ -np.log(len_both_children_terminal) + np.log(len(grow_nodes)) \ + logprior_children log_inv_acc_loglik = (loglik - self.loglik[node_id]) log_inv_acc = log_inv_acc_loglik + log_inv_acc_prior if settings.verbose >= 2: print 'compute_log_inv_acc_p: log_acc_loglik = %s, log_acc_prior = %s' \ % (-log_inv_acc_loglik, -log_inv_acc_prior) assert(log_inv_acc > -np.inf) return log_inv_acc
def compute_log_acc_g(self, node_id, param, len_both_children_terminal, loglik, \ train_ids_left, train_ids_right, cache, settings, data, grow_nodes): # effect of do_not_split does not matter for node_id since it has children logprior_children = 0.0 left, right = get_children_id(node_id) if not no_valid_split_exists(data, cache, train_ids_left, settings): logprior_children += np.log(self.compute_pnosplit(left, param)) if not no_valid_split_exists(data, cache, train_ids_right, settings): logprior_children += np.log(self.compute_pnosplit(right, param)) log_acc_prior = np.log(self.compute_psplit(node_id, param)) \ -np.log(self.compute_pnosplit(node_id, param)) \ -np.log(len_both_children_terminal) + np.log(len(grow_nodes)) \ + logprior_children log_acc_loglik = (loglik - self.loglik[node_id]) log_acc = log_acc_prior + log_acc_loglik if settings.verbose >= 2: print('compute_log_acc_g: log_acc_loglik = %s, log_acc_prior = %s' \ % (log_acc_loglik, log_acc_prior)) if loglik == -np.inf: # just need to ensure that an invalid split is not grown log_acc = -np.inf return log_acc
def compute_log_acc_g(self, node_id, param, len_both_children_terminal, loglik, \ train_ids_left, train_ids_right, cache, settings, data, grow_nodes): # effect of do_not_split does not matter for node_id since it has children logprior_children = 0.0 left, right = get_children_id(node_id) if not no_valid_split_exists(data, cache, train_ids_left, settings): logprior_children += np.log(self.compute_pnosplit(left, param)) if not no_valid_split_exists(data, cache, train_ids_right, settings): logprior_children += np.log(self.compute_pnosplit(right, param)) log_acc_prior = np.log(self.compute_psplit(node_id, param)) \ -np.log(self.compute_pnosplit(node_id, param)) \ -np.log(len_both_children_terminal) + np.log(len(grow_nodes)) \ + logprior_children log_acc_loglik = (loglik - self.loglik[node_id]) log_acc = log_acc_prior + log_acc_loglik if settings.verbose >= 2: print 'compute_log_acc_g: log_acc_loglik = %s, log_acc_prior = %s' \ % (log_acc_loglik, log_acc_prior) if loglik == -np.inf: # just need to ensure that an invalid split is not grown log_acc = -np.inf return log_acc