def tcf_cut(orig_datapoints, boundary_width=0.1, n=2): """ input: datapoints, table, type of method output: two list of data in each cluster """ datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) coeff, oa, ob = _tcf(datapoints) c_left = [] c_right = [] r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1] ** 2) ** 0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def tcf_cut(orig_datapoints, boundary_width=0.1, n=2): """ input: datapoints, table, type of method output: two list of data in each cluster """ datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) coeff, oa, ob = _tcf(datapoints) c_left = [] c_right = [] r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1]**2)**0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def rwm_cut(orig_datapoints, boundary_width=0.1, n=2): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) in_boundary = 0 size, dim = datapoints.shape c_left = [] c_right = [] coeff = _rwm(datapoints) r_bp = [] l_bp = [] r_nbp = [] l_nbp = [] for orig_point, copy_point in zip(orig_datapoints, datapoints): # calc distance from point to boundary unit_len = sum(coeff[:-1] ** 2) ** 0.5 p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if abs(p2b_dist) <= boundary_width * n: if p2b_dist >= 0: r_nbp.append(orig_point) else: l_nbp.append(orig_point) if abs(p2b_dist) <= boundary_width: if p2b_dist >= 0: r_bp.append(orig_point) else: l_bp.append(orig_point) if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) r_bp = np.array(r_bp, np.float) l_bp = np.array(l_bp, np.float) r_nbp = np.array(r_nbp, np.float) l_nbp = np.array(l_nbp, np.float) # left, right, in boundary point, coeff return c_left, c_right, (r_bp, l_bp), (r_nbp, l_nbp), coeff
def cut_by_coeff(orig_datapoints, coeff): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) c_left = [] c_right = [] unit_len = sum(coeff[:-1] ** 2) ** 0.5 for orig_point, copy_point in zip(orig_datapoints, datapoints): p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) return (c_left, c_right)
def cut_by_coeff(orig_datapoints, coeff): datapoints = deepcopy(orig_datapoints) datapoints = utl.centralize_data(datapoints) datapoints = utl.normalize_data(datapoints) c_left = [] c_right = [] unit_len = sum(coeff[:-1]**2)**0.5 for orig_point, copy_point in zip(orig_datapoints, datapoints): p2b_dist = (sum(copy_point * coeff[:-1]) + coeff[-1]) / unit_len if p2b_dist >= 0: c_right.append(orig_point) else: c_left.append(orig_point) c_left = np.array(c_left, np.float) c_right = np.array(c_right, np.float) return (c_left, c_right)
for cls in tmp_clusters: res_cls.append(cls) print("#cls {} -> {}".format(len(clusters), len(res_cls))) print(calc_num_point(res_cls)) return res_cls if __name__ == '__main__': doctest.testmod() points, label = utl.read_from_text('2d5c_noncycle') points = utl.centralize_data(points) points = utl.normalize_data(points) # points, label = utl.read_from_text('2d5c_cov') # points, label = utl.read_from_text('hand_write_digit_2d') # seleted = datasets.load_digits() # points = seleted.data # label = seleted.target # ms_tree = ms2c(points) # paint_tree(ms_tree, ms_tree) final_nodes = ms_tree.merge() grounded_nodes = ms_tree.grounded_nodes grounded_cls = [x.datapoints for x in grounded_nodes] final_cls = [x.datapoints for x in final_nodes]