if __name__ == "__main__": # doctest.testmod() from experiment import data_seletor # points, labels = data_seletor('hand_write_digits') # points, label = utl.gaussian_data_generator(dim=2, cls=2) points, label = utl.normal_data_generator(dim=2, cls=2) # points = np.array([ [0, 0], [0, 1], [0, 2], [0, 3], [0, -1], [0, -2], [25, -3], [25, -2], [25, -4], [25, -6], [25, -5], [-5, 0], [-5, 1], [30, -4] ]) import rwm as rwm c1, c2, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(points) a, b, in_boundary, in_n_boundary, coeff = tcf_cut(points) coeff, oa, ob = _tcf(points) fig, axs = plt.subplots(1, 2) axs[0].set_title("rwm") axs[0].plot(c1[:, 0], c1[:, 1], "ro") axs[0].plot(c2[:, 0], c2[:, 1], "bo") axs[1].set_title("analytical") axs[1].plot(a[:, 0], a[:, 1], "ro") axs[1].plot(b[:, 0], b[:, 1], "bo") axs[1].plot(oa[0], oa[1], "go") axs[1].plot(ob[0], ob[1], "go")
def split(self, method): print("in split func") if method not in ["rwm", "tcf"]: msg = "'method' must be 'rwm' or 'tcf'" raise ValueError(msg) if self.active is False: return # before split, check whether continue happened BAD_CUT_TOLERANCE bad cut # withdraw those bad cuts and mark grounded sign on first parent print("self node = {}".format(self)) print("self.bad_cut = {}, self.level = {}".format(self.bad_cut, self.level)) if 2 ** (self.level) >= Tree.MAX_LEAVES: self.grounded = True Tree.total_leaves += 1 print("[> MAX_LEAVES] set grounded: {}".format(self)) return if self.bad_cut >= Tree.BAD_CUT_TOLERANCE: node = self for i in range(Tree.BAD_CUT_TOLERANCE): node = node.parent node.left.active = False node.right.active = False print("set false: {}, {}".format(node.left, node.right)) node.grounded = True Tree.total_leaves += 1 print("set grounded: {}".format(node)) return if self.size < 30: self.grounded = True Tree.total_leaves += 1 print("[node.size < 30] set grounded: {}".format(self)) return if method == 'rwm': clusterL, clusterR, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(self.datapoints) elif method == 'tcf': print('\nsplit shape = ', self.datapoints.shape) clusterL, clusterR, in_boundary, in_n_boundary, coeff = analytical.tcf_cut(self.datapoints) print('\nsplit shape = ', self.datapoints.shape) print("1 of split shape = ", clusterL.shape) print("1 of split shape = ", clusterR.shape) cluster_size_L = len(clusterL) cluster_size_R = len(clusterR) bound_size_L = len(in_boundary[0]) bound_size_R = len(in_boundary[1]) n_bound_size_L = len(in_n_boundary[0]) n_bound_size_R = len(in_n_boundary[1]) bound_size = bound_size_L + bound_size_R # create node by split result self.right = Tree(self, clusterR) self.right.size = cluster_size_R self.left = Tree(self, clusterL) self.left.size = cluster_size_L ''' ================================================== 0.5 and/or 0.5 [64: b/1, 32: b/1, 16: n/1, 8: g/n] 0.3 and/or 0.3 [64: , 32: , 16: , 8: ] 0.1 and/or 0.1 [64: , 32: , 16: , 8: ] ''' if bound_size_L > 0 and bound_size_R > 0: print("[F**K] bound_size_L = {}, n_bound_size_L = {}, rate = {}".format(bound_size_L, n_bound_size_L, bound_size_L / n_bound_size_L)) print("[F**K] bound_size_R = {}, n_bound_size_R = {}, rate = {}".format(bound_size_R, n_bound_size_R, bound_size_R / n_bound_size_R)) # if (bound_size_L / cluster_size_L) > 0.1 and (bound_size_R / cluster_size_R) > 0.1: if (bound_size_L / n_bound_size_L) > 0.5 and (bound_size_R / n_bound_size_R) > 0.5: self.left.bad_cut = self.bad_cut + 1 self.right.bad_cut = self.bad_cut + 1 print("cut id ", Tree.total_cut, " is bad cut") self.left.in_bound_record.append((Tree.total_cut, 'L', in_boundary[0])) self.right.in_bound_record.append((Tree.total_cut, 'R', in_boundary[1])) if self.in_bound_record: print("self.in_bound_record = ", len(self.in_bound_record)) print("node addr = ", self) for bound_rec in self.in_bound_record: rec_cut_id = bound_rec[0] rec_color = bound_rec[1] rec_points = bound_rec[2] if method == 'rwm': b_left, b_right = rwm.cut_by_coeff(rec_points, coeff) elif method == 'tcf': b_left, b_right = analytical.cut_by_coeff(rec_points, coeff) if b_left.shape[0]: self.left.in_bound_record.append((rec_cut_id, rec_color, b_left)) if b_right.shape[0]: self.right.in_bound_record.append((rec_cut_id, rec_color, b_right)) print("cur cid = {}, rec_cut_id = {}".format(Tree.total_cut, rec_cut_id)) print("after split, child.bad_cut = {}".format(self.left.bad_cut)) Tree.total_cut += 1 print("end of split shape = ", clusterL.shape) print("end of split shape = ", clusterR.shape) return
return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff if __name__ == '__main__': # doctest.testmod() from experiment import data_seletor # points, labels = data_seletor('hand_write_digits') # points, label = utl.gaussian_data_generator(dim=2, cls=2) points, label = utl.normal_data_generator(dim=2, cls=2) # points = np.array([ [0, 0], [0, 1], [0, 2], [0, 3], [0, -1], [0, -2], [25, -3], [25, -2], [25, -4], [25, -6], [25, -5], [-5, 0], [-5, 1], [30, -4] ]) import rwm as rwm c1, c2, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(points) a, b, in_boundary, in_n_boundary, coeff = tcf_cut(points) coeff, oa, ob = _tcf(points) fig, axs = plt.subplots(1, 2) axs[0].set_title('rwm') axs[0].plot(c1[:, 0], c1[:, 1], 'ro') axs[0].plot(c2[:, 0], c2[:, 1], 'bo') axs[1].set_title('analytical') axs[1].plot(a[:, 0], a[:, 1], 'ro') axs[1].plot(b[:, 0], b[:, 1], 'bo') axs[1].plot(oa[0], oa[1], 'go') axs[1].plot(ob[0], ob[1], 'go')