예제 #1
0

if __name__ == "__main__":
    # doctest.testmod()

    from experiment import data_seletor

    # points, labels = data_seletor('hand_write_digits')

    # points, label = utl.gaussian_data_generator(dim=2, cls=2)
    points, label = utl.normal_data_generator(dim=2, cls=2)
    # points = np.array([ [0, 0], [0, 1], [0, 2], [0, 3], [0, -1], [0, -2], [25, -3], [25, -2], [25, -4], [25, -6], [25, -5], [-5, 0], [-5, 1], [30, -4] ])

    import rwm as rwm

    c1, c2, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(points)
    a, b, in_boundary, in_n_boundary, coeff = tcf_cut(points)
    coeff, oa, ob = _tcf(points)

    fig, axs = plt.subplots(1, 2)

    axs[0].set_title("rwm")
    axs[0].plot(c1[:, 0], c1[:, 1], "ro")
    axs[0].plot(c2[:, 0], c2[:, 1], "bo")

    axs[1].set_title("analytical")
    axs[1].plot(a[:, 0], a[:, 1], "ro")
    axs[1].plot(b[:, 0], b[:, 1], "bo")

    axs[1].plot(oa[0], oa[1], "go")
    axs[1].plot(ob[0], ob[1], "go")
예제 #2
0
  def split(self, method):
    print("in split func")

    if method not in ["rwm", "tcf"]:
      msg = "'method' must be 'rwm' or 'tcf'"
      raise ValueError(msg)

    if self.active is False:
      return

    # before split, check whether continue happened BAD_CUT_TOLERANCE bad cut
    # withdraw those bad cuts and mark grounded sign on first parent
    print("self node = {}".format(self))
    print("self.bad_cut = {}, self.level = {}".format(self.bad_cut, self.level))

    if 2 ** (self.level) >= Tree.MAX_LEAVES:
      self.grounded = True
      Tree.total_leaves += 1
      print("[> MAX_LEAVES] set grounded: {}".format(self))
      return

    if self.bad_cut >= Tree.BAD_CUT_TOLERANCE:
      node = self
      for i in range(Tree.BAD_CUT_TOLERANCE):
        node = node.parent
        node.left.active = False
        node.right.active = False
        print("set false: {}, {}".format(node.left, node.right))
      node.grounded = True
      Tree.total_leaves += 1
      print("set grounded: {}".format(node))
      return

    if self.size < 30:
      self.grounded = True
      Tree.total_leaves += 1
      print("[node.size < 30] set grounded: {}".format(self))
      return


    if method == 'rwm':
      clusterL, clusterR, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(self.datapoints)
    elif method == 'tcf':
      print('\nsplit shape = ', self.datapoints.shape)
      clusterL, clusterR, in_boundary, in_n_boundary, coeff = analytical.tcf_cut(self.datapoints)
      print('\nsplit shape = ', self.datapoints.shape)
    print("1 of split shape = ", clusterL.shape)
    print("1 of split shape = ", clusterR.shape)

    cluster_size_L = len(clusterL)
    cluster_size_R = len(clusterR)
    bound_size_L = len(in_boundary[0])
    bound_size_R = len(in_boundary[1])
    n_bound_size_L = len(in_n_boundary[0])
    n_bound_size_R = len(in_n_boundary[1])
    bound_size = bound_size_L + bound_size_R

    # create node by split result
    self.right = Tree(self, clusterR)
    self.right.size = cluster_size_R
    self.left = Tree(self, clusterL)
    self.left.size = cluster_size_L
    
    '''
    ==================================================
    0.5 and/or 0.5 [64: b/1, 32: b/1, 16: n/1, 8: g/n]
    0.3 and/or 0.3 [64: , 32: , 16: , 8: ]
    0.1 and/or 0.1 [64: , 32: , 16: , 8: ]
    '''

    if bound_size_L > 0 and bound_size_R > 0:
      print("[F**K] bound_size_L = {}, n_bound_size_L = {}, rate = {}".format(bound_size_L, n_bound_size_L, bound_size_L / n_bound_size_L))
      print("[F**K] bound_size_R = {}, n_bound_size_R = {}, rate = {}".format(bound_size_R, n_bound_size_R, bound_size_R / n_bound_size_R))
      # if (bound_size_L / cluster_size_L) > 0.1 and (bound_size_R / cluster_size_R) > 0.1:
      if (bound_size_L / n_bound_size_L) > 0.5 and (bound_size_R / n_bound_size_R) > 0.5:
        self.left.bad_cut = self.bad_cut + 1
        self.right.bad_cut = self.bad_cut + 1
        print("cut id ", Tree.total_cut, " is bad cut")
        self.left.in_bound_record.append((Tree.total_cut, 'L', in_boundary[0]))
        self.right.in_bound_record.append((Tree.total_cut, 'R', in_boundary[1]))

    if self.in_bound_record:
      print("self.in_bound_record = ", len(self.in_bound_record))
      print("node addr = ", self)
    for bound_rec in self.in_bound_record:
      rec_cut_id = bound_rec[0]
      rec_color  = bound_rec[1]
      rec_points = bound_rec[2]
      if method == 'rwm':
        b_left, b_right = rwm.cut_by_coeff(rec_points, coeff)
      elif method == 'tcf':
        b_left, b_right = analytical.cut_by_coeff(rec_points, coeff)

      if b_left.shape[0]:
        self.left.in_bound_record.append((rec_cut_id, rec_color, b_left))
      if b_right.shape[0]:
        self.right.in_bound_record.append((rec_cut_id, rec_color, b_right))
      print("cur cid = {}, rec_cut_id = {}".format(Tree.total_cut, rec_cut_id))

    print("after split, child.bad_cut = {}".format(self.left.bad_cut))
    Tree.total_cut += 1

    print("end of split shape = ", clusterL.shape)
    print("end of split shape = ", clusterR.shape)

    return
예제 #3
0
    return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff


if __name__ == '__main__':
    # doctest.testmod()

    from experiment import data_seletor
    # points, labels = data_seletor('hand_write_digits')

    # points, label = utl.gaussian_data_generator(dim=2, cls=2)
    points, label = utl.normal_data_generator(dim=2, cls=2)
    # points = np.array([ [0, 0], [0, 1], [0, 2], [0, 3], [0, -1], [0, -2], [25, -3], [25, -2], [25, -4], [25, -6], [25, -5], [-5, 0], [-5, 1], [30, -4] ])

    import rwm as rwm

    c1, c2, in_boundary, in_n_boundary, coeff = rwm.rwm_cut(points)
    a, b, in_boundary, in_n_boundary, coeff = tcf_cut(points)
    coeff, oa, ob = _tcf(points)

    fig, axs = plt.subplots(1, 2)

    axs[0].set_title('rwm')
    axs[0].plot(c1[:, 0], c1[:, 1], 'ro')
    axs[0].plot(c2[:, 0], c2[:, 1], 'bo')

    axs[1].set_title('analytical')
    axs[1].plot(a[:, 0], a[:, 1], 'ro')
    axs[1].plot(b[:, 0], b[:, 1], 'bo')

    axs[1].plot(oa[0], oa[1], 'go')
    axs[1].plot(ob[0], ob[1], 'go')