def spam_maker(args): if args.json: spam = shuffle_from_file(args.json) elif args.text: spam = utils.read_from_text(args.text) else: spam = [ " ".join(get_random_word() for _ in range(400)) for _ in range(2000) ] return spam
def data_seletor(dataset_name): if dataset_name == 'hand_write_digits': ''' Classes 10 Samples per class ~180 Samples total 1797 Dimensionality 64 Features integers 0-16 ''' seleted = datasets.load_digits(n_class=5) points = seleted.data label = seleted.target elif dataset_name == '5d5c': ''' Classes 5 Samples per class ? Samples total ? Dimensionality 25 ''' points, label = utl.read_from_text('5d5c_std') elif dataset_name == "letter-recognition": points, label = utl.read_from_text('letter-recognition') elif dataset_name == "lung-cancer": points, label = utl.read_from_text('lung-cancer') elif dataset_name == "image_seg": points, label = utl.read_from_text('imgseg') elif dataset_name == '20d6c': points, label = utl.read_from_text('20d6c_std') elif dataset_name == '50d6c': points, label = utl.read_from_text('50d6c_std') else: assert("not found") return points, label
res_cls.append(cur_cls) for cls in tmp_clusters: res_cls.append(cls) print("#cls {} -> {}".format(len(clusters), len(res_cls))) print(calc_num_point(res_cls)) return res_cls if __name__ == '__main__': doctest.testmod() points, label = utl.read_from_text('2d5c_noncycle') points = utl.centralize_data(points) points = utl.normalize_data(points) # points, label = utl.read_from_text('2d5c_cov') # points, label = utl.read_from_text('hand_write_digit_2d') # seleted = datasets.load_digits() # points = seleted.data # label = seleted.target # ms_tree = ms2c(points) # paint_tree(ms_tree, ms_tree) final_nodes = ms_tree.merge() grounded_nodes = ms_tree.grounded_nodes