Esempio n. 1
0
 def compute_log_inv_acc_p(self, node_id, param, len_both_children_terminal, loglik, grow_nodes, \
         cache, settings, data):
     # 1/acc for PRUNE is acc for GROW except for corrections to both_children_terminal 
     #       and grow_nodes list
     logprior_children = 0.0
     left, right = get_children_id(node_id)
     if not no_valid_split_exists(data, cache, self.train_ids[left], settings):
         logprior_children += np.log(self.compute_pnosplit(left, param))
     if not no_valid_split_exists(data, cache, self.train_ids[right], settings):
         logprior_children += np.log(self.compute_pnosplit(right, param))
     try:
         check_if_zero(logprior_children - self.logprior[left] - self.logprior[right])
     except AssertionError:
         print 'oh oh ... looks like a bug in compute_log_inv_acc_p'
         print 'term 1 = %s' % logprior_children
         print 'term 2 = %s, 2a = %s, 2b = %s' % (self.logprior[left]+self.logprior[right], \
                  self.logprior[left], self.logprior[right])
         print 'node_id = %s, left = %s, right = %s, logprior = %s' % (node_id, left, right, self.logprior)
         raise AssertionError
     log_inv_acc_prior = np.log(self.compute_psplit(node_id, param)) \
             - np.log(self.compute_pnosplit(node_id, param)) \
             -np.log(len_both_children_terminal) + np.log(len(grow_nodes)) \
             + logprior_children 
     log_inv_acc_loglik = (loglik - self.loglik[node_id])
     log_inv_acc = log_inv_acc_loglik + log_inv_acc_prior
     if settings.verbose >= 2:
         print 'compute_log_inv_acc_p: log_acc_loglik = %s, log_acc_prior = %s' \
                 % (-log_inv_acc_loglik, -log_inv_acc_prior)
     assert(log_inv_acc > -np.inf)
     return log_inv_acc
Esempio n. 2
0
 def compute_log_inv_acc_p(self, node_id, param, len_both_children_terminal, loglik, grow_nodes, \
         cache, settings, data):
     # 1/acc for PRUNE is acc for GROW except for corrections to both_children_terminal 
     #       and grow_nodes list
     logprior_children = 0.0
     left, right = get_children_id(node_id)
     if not no_valid_split_exists(data, cache, self.train_ids[left], settings):
         logprior_children += np.log(self.compute_pnosplit(left, param))
     if not no_valid_split_exists(data, cache, self.train_ids[right], settings):
         logprior_children += np.log(self.compute_pnosplit(right, param))
     try:
         check_if_zero(logprior_children - self.logprior[left] - self.logprior[right])
     except AssertionError:
         print 'oh oh ... looks like a bug in compute_log_inv_acc_p'
         print 'term 1 = %s' % logprior_children
         print 'term 2 = %s, 2a = %s, 2b = %s' % (self.logprior[left]+self.logprior[right], \
                  self.logprior[left], self.logprior[right])
         print 'node_id = %s, left = %s, right = %s, logprior = %s' % (node_id, left, right, self.logprior)
         raise AssertionError
     log_inv_acc_prior = np.log(self.compute_psplit(node_id, param)) \
             - np.log(self.compute_pnosplit(node_id, param)) \
             -np.log(len_both_children_terminal) + np.log(len(grow_nodes)) \
             + logprior_children 
     log_inv_acc_loglik = (loglik - self.loglik[node_id])
     log_inv_acc = log_inv_acc_loglik + log_inv_acc_prior
     if settings.verbose >= 2:
         print 'compute_log_inv_acc_p: log_acc_loglik = %s, log_acc_prior = %s' \
                 % (-log_inv_acc_loglik, -log_inv_acc_prior)
     assert(log_inv_acc > -np.inf)
     return log_inv_acc
Esempio n. 3
0
 def check_if_same(self, log_acc, loglik_diff, logprior_diff):
     # change/swap operations should depend only on what happens in current subtree
     loglik_diff_2 =  sum([self.loglik_new[node] for node in self.leaf_nodes]) \
                     - sum([self.loglik[node] for node in self.leaf_nodes])
     logprior_diff_2 = sum([self.logprior_new[node] for node in self.logprior_new]) \
                      - sum([self.logprior[node] for node in self.logprior])
     log_acc_2 = loglik_diff_2 + logprior_diff_2
     try:
         check_if_zero(log_acc - log_acc_2)
     except AssertionError:
         if not ((log_acc == -np.inf) and (log_acc_2 == -np.inf)):
             print 'check if terms match:'
             print 'loglik_diff = %s, loglik_diff_2 = %s' % (loglik_diff, loglik_diff_2)
             print 'logprior_diff = %s, logprior_diff_2 = %s' % (logprior_diff, logprior_diff_2)
             raise AssertionError
Esempio n. 4
0
 def check_if_same(self, log_acc, loglik_diff, logprior_diff):
     # change/swap operations should depend only on what happens in current subtree
     loglik_diff_2 =  sum([self.loglik_new[node] for node in self.leaf_nodes]) \
                     - sum([self.loglik[node] for node in self.leaf_nodes])
     logprior_diff_2 = sum([self.logprior_new[node] for node in self.logprior_new]) \
                      - sum([self.logprior[node] for node in self.logprior])
     log_acc_2 = loglik_diff_2 + logprior_diff_2
     try:
         check_if_zero(log_acc - log_acc_2)
     except AssertionError:
         if not ((log_acc == -np.inf) and (log_acc_2 == -np.inf)):
             print 'check if terms match:'
             print 'loglik_diff = %s, loglik_diff_2 = %s' % (loglik_diff, loglik_diff_2)
             print 'logprior_diff = %s, logprior_diff_2 = %s' % (logprior_diff, logprior_diff_2)
             raise AssertionError
Esempio n. 5
0
 def evaluate_new_subtree(self, data, node_id_start, param, nodes_subtree,
                          cache, settings):
     for i in self.train_ids[node_id_start]:
         x_, y_ = data['x_train'][i, :], data['y_train'][i]
         node_id = copy(node_id_start)
         while True:
             self.sum_y_new[node_id] += y_
             self.sum_y2_new[node_id] += y_**2
             self.n_points_new[node_id] += 1
             self.train_ids_new[node_id] = np.append(
                 self.train_ids_new[node_id], i)
             if node_id in self.leaf_nodes:
                 break
             left, right = get_children_id(node_id)
             feat_id, split, idx_split_global = self.node_info_new[
                 node_id]  # splitting on new criteria
             if x_[feat_id] <= split:
                 node_id = left
             else:
                 node_id = right
     for node_id in nodes_subtree:
         self.loglik_new[node_id] = -np.inf
         if self.n_points_new[node_id] > 0:
             self.loglik_new[node_id], self.param_n_new[node_id] = \
                     compute_normal_normalizer(self.sum_y_new[node_id], self.sum_y2_new[node_id], \
                             self.n_points_new[node_id], param, cache, settings)
         if node_id in self.leaf_nodes:
             if stop_split(self.train_ids_new[node_id], settings, data,
                           cache):
                 # if leaf is empty, logprior_new[node_id] = 0.0 is incorrect; however
                 #      loglik_new[node_id] = -np.inf will reject move to a tree with empty leaves
                 self.logprior_new[node_id] = 0.0
             else:
                 # node with just 1 data point earlier could have more data points now
                 self.logprior_new[node_id] = np.log(
                     self.compute_pnosplit(node_id, param))
         else:
             # split probability might have changed if train_ids have changed
             self.recompute_prob_split(data, param, settings, cache,
                                       node_id)
     if settings.debug == 1:
         try:
             check_if_zero(self.loglik[node_id_start] -
                           self.loglik_new[node_id_start])
         except AssertionError:
             print('train_ids[node_id_start] = %s, train_ids_new[node_id_start] = %s' \
                     % (self.train_ids[node_id_start], self.train_ids_new[node_id_start]))
             raise AssertionError
Esempio n. 6
0
 def update_p(self, particles, log_weights, log_pd, settings):
     node_info_old = {}
     first_iter = False
     if settings.verbose >= 2:
         print('log_weights = %s' % log_weights)
     k = sample_multinomial(softmax(log_weights))
     try:
         node_info_old = self.p.node_info
     except AttributeError:
         first_iter = True
         # first iteration probably: self.p would not be present
         pass
     same_tree = node_info_old == particles[k].node_info
     self.p = particles[k]
     self.log_pd = log_pd
     if settings.verbose >= 2:
         print('pid_sampled = %s' % k)
         print('new tree:')
         self.p.print_tree()
     if not same_tree and settings.verbose >= 1:
         print('non-identical trees')
     if k == 0 and not first_iter:
         assert same_tree
     elif same_tree and not first_iter:  # particles from pmcmc during init might be different
         if settings.verbose >= 1:
             print('identical tree without k == 0')
         try:
             check_if_zero(log_weights[k] - log_weights[0])
         except AssertionError:
             print('node_info_old = %s' % node_info_old)
             print('same_tree = %s' % same_tree)
             print('k = %s, particles[k].node_info = %s' %
                   (k, particles[k].node_info))
             print('log_weights[0] = %s, log_weights[k] = %s' % \
                     (log_weights[0], log_weights[k]))
             if not first_iter:
                 print(p_old.log_sis_ratio_d)
             print(particles[0].log_sis_ratio_d)
             print(particles[k].log_sis_ratio_d)
             raise AssertionError
     self.p.check_depth()
     if settings.verbose >= 2:
         print('sampled particle = %5d, ancestry = %s' %
               (k, self.p.ancestry))
     return not same_tree
Esempio n. 7
0
 def update_p(self, particles, log_weights, log_pd, settings):
     node_info_old = {}
     first_iter = False
     if settings.verbose >= 2:
         print 'log_weights = %s' % log_weights
     k = sample_multinomial(softmax(log_weights))
     try:
         node_info_old = self.p.node_info 
     except AttributeError:
         first_iter = True
         # first iteration probably: self.p would not be present
         pass
     same_tree = node_info_old == particles[k].node_info
     self.p = particles[k] 
     self.log_pd = log_pd
     if settings.verbose >= 2:
         print 'pid_sampled = %s' % k
         print 'new tree:'
         self.p.print_tree()
     if not same_tree and settings.verbose >=1:
         print 'non-identical trees'
     if k == 0 and not first_iter:
         assert same_tree
     elif same_tree and not first_iter:  # particles from pmcmc during init might be different
         if settings.verbose >= 1:
             print 'identical tree without k == 0'
         try:
             check_if_zero(log_weights[k] - log_weights[0])
         except AssertionError:
             print 'node_info_old = %s' % node_info_old
             print 'same_tree = %s' % same_tree
             print 'k = %s, particles[k].node_info = %s' % (k, particles[k].node_info)
             print 'log_weights[0] = %s, log_weights[k] = %s' % \
                     (log_weights[0], log_weights[k])
             if not first_iter:
                 print p_old.log_sis_ratio_d
             print particles[0].log_sis_ratio_d
             print particles[k].log_sis_ratio_d
             raise AssertionError
     self.p.check_depth()
     if settings.verbose >= 2:
         print 'sampled particle = %5d, ancestry = %s' % (k, self.p.ancestry)
     return not same_tree
Esempio n. 8
0
 def evaluate_new_subtree(self, data, node_id_start, param, nodes_subtree, cache, settings):
     for i in self.train_ids[node_id_start]:
         x_, y_ = data['x_train'][i, :], data['y_train'][i]
         node_id = copy(node_id_start)
         while True:
             self.sum_y_new[node_id] += y_
             self.sum_y2_new[node_id] += y_ ** 2
             self.n_points_new[node_id] += 1
             self.train_ids_new[node_id] = np.append(self.train_ids_new[node_id], i)
             if node_id in self.leaf_nodes:
                 break
             left, right = get_children_id(node_id)
             feat_id, split, idx_split_global = self.node_info_new[node_id]   # splitting on new criteria
             if x_[feat_id] <= split:
                 node_id = left
             else:
                 node_id = right
     for node_id in nodes_subtree:
         self.loglik_new[node_id] = -np.inf
         if self.n_points_new[node_id] > 0:
             self.loglik_new[node_id], self.param_n_new[node_id] = \
                     compute_normal_normalizer(self.sum_y_new[node_id], self.sum_y2_new[node_id], \
                             self.n_points_new[node_id], param, cache, settings)
         if node_id in self.leaf_nodes:
             if stop_split(self.train_ids_new[node_id], settings, data, cache):
             # if leaf is empty, logprior_new[node_id] = 0.0 is incorrect; however
             #      loglik_new[node_id] = -np.inf will reject move to a tree with empty leaves
                 self.logprior_new[node_id] = 0.0
             else:
                 # node with just 1 data point earlier could have more data points now 
                 self.logprior_new[node_id] = np.log(self.compute_pnosplit(node_id, param))
         else:
             # split probability might have changed if train_ids have changed
             self.recompute_prob_split(data, param, settings, cache, node_id)
     if settings.debug == 1:
         try:
             check_if_zero(self.loglik[node_id_start] - self.loglik_new[node_id_start])
         except AssertionError:
             print 'train_ids[node_id_start] = %s, train_ids_new[node_id_start] = %s' \
                     % (self.train_ids[node_id_start], self.train_ids_new[node_id_start])
             raise AssertionError