def test_color_mean_gray(): data = np.array([[0., 0.], [0.1, 0.1], [0.2, 0.2], [0.2, 0.8], [0.1, 0.9], [0., 1.], [0.8, 0.8], [0.9, 0.9], [1., 1.]]) target = np.array([[0], [0], [0], [1], [1.1], [0.9], [2], [2], [2]]) t = Topology() t.load_data(data) t.fit_transform(metric=None, lens=None) t.map(resolution=2, overlap=0.3, eps=0.2, min_samples=3) t.color(target, color_method="mean", color_type="gray", normalize=True) test_color = ['#dcdcdc', '#787878', '#141414'] assert t.hex_colors == test_color
def test_load_data_ndarray_data(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) t = Topology() t.load_data(data) assert_array_equal(t.number_data, data)
def test_load_data_text_data_columns_text_data_diff_columns(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) text_data = np.array([["data1-1", "data1-2"], ["data2-1", "data2-2"]]) text_data_columns = ["columns1"] t = Topology() with pytest.raises(ValueError): t.load_data(data, text_data=text_data, text_data_columns=text_data_columns)
def test_load_data_number_data_columns_ndarray(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) data_columns = np.array(["data1", "data2"]) t = Topology() t.load_data(data, number_data_columns=data_columns) assert_array_equal(t.number_data_columns, data_columns)
def test_load_data_text_data_ndarray(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) text_data = np.array([["data1"], ["data2"]]) t = Topology() t.load_data(data, text_data=text_data) assert_array_equal(t.text_data, text_data)
def test_load_data_text_data_columns_text_data_is_none(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) text_data = None text_data_columns = ["columns1", "columns2"] t = Topology() with pytest.raises(ValueError): t.load_data(data, text_data=text_data, text_data_columns=text_data_columns)
def test_load_data_standardize_true(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) t = Topology() t.load_data(data, standardize=True) scaler = preprocessing.StandardScaler() test_data = scaler.fit_transform(data) assert_array_equal(t.std_number_data, test_data)
def test_load_data_text_data_columns(): data = np.array([[0.0, 0.0], [1.0, 1.0]]) text_data = np.array([["data1-1", "data1-2"], ["data2-1", "data2-2"]]) text_data_columns = ["columns1", "columns2"] t = Topology() t.load_data(data, text_data=text_data, text_data_columns=text_data_columns) assert_array_equal(t.text_data_columns, np.array(text_data_columns))
def test_map(): data = np.array([[0., 0.], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1., 1.], [1., 0.], [0.25, 0.75], [0.75, 0.25], [0., 1.]]) t = Topology() t.load_data(data) t.fit_transform(metric=None, lens=None) t.map(resolution=2, overlap=0.3, eps=0.3, min_samples=3) test_nodes = np.array([[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]) test_edges = np.array([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]) assert_array_equal(t.nodes, test_nodes) assert_array_equal(t.edges, test_edges)
def test_color_different_size_input(): data = np.array([[0., 0.], [0.1, 0.1], [0.2, 0.2], [0.2, 0.8], [0.1, 0.9], [0., 1.], [0.8, 0.8], [0.9, 0.9], [1., 1.]]) target = np.array([0, 1, 2]) t = Topology() t.load_data(data) t.fit_transform() t.map() with pytest.raises(Exception): t.color(target)
def test_color_ctype(): data = np.array([[0., 0.], [0.1, 0.1], [0.2, 0.2], [0.2, 0.8], [0.1, 0.9], [0., 1.], [0.8, 0.8], [0.9, 0.9], [1., 1.]]) target = np.array([[0], [0], [0], [1], [1], [1], [2], [2], [2]]) t = Topology() t.load_data(data) t.fit_transform() t.map(resolution=2, overlap=0.3) with pytest.raises(Exception): t.color(target, color_type="hoge")
def test_map_min_samples_under_zero(): data = np.array([[0., 0.], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1., 1.], [1., 0.], [0.25, 0.75], [0.75, 0.25], [0., 1.]]) t = Topology() t.load_data(data) t.fit_transform(metric=None, lens=None) with pytest.raises(Exception): t.map(min_samples=-1)
def test_transform_none_none(): data = np.array([[0., 0.], [1., 1.]]) t = Topology() t.load_data(data) metric = None lens = None t.fit_transform(metric=metric, lens=lens) test_data = np.array([[0., 0.], [1., 1.]]) assert_array_equal(t.point_cloud, test_data)
def test_transform_multi_lens(): data = np.array([[0., 0.], [0., 1.], [1., 1.]]) t = Topology() t.load_data(data) metric = "hamming" lens = [L1Centrality(), GaussianDensity(h=0.25)] t.fit_transform(metric=metric, lens=lens) test_data = np.array([[1., 0.], [0., 1.], [1., 0.]]) assert_array_equal(t.point_cloud, test_data)
def test_transform_none_pca(): data = np.array([[0., 1.], [1., 0.]]) t = Topology() t.load_data(data) metric = None lens = [PCA(components=[0])] t.fit_transform(metric=metric, lens=lens) test_data = np.array([0., 1.]) test_data = test_data.reshape(test_data.shape[0], 1) assert_array_equal(t.point_cloud, test_data)
def reduction(): try: # get request params file_id = request.params.file_id target_index = request.params.target_index algorithm = int(request.params.algorithm) # get file name file_name = _get_file_name_from_id(file_id) file_path = os.path.join(DATA_DIR, file_name) # create topology instance topology = Topology(verbose=0) loader = CSVLoader(file_path) topology.load(loader=loader, standardize=True) # If target index isn't exists, use all data to calculate if target_index != '': topology.number_data, target = _split_target( topology.number_data, int(target_index)) # transform & scaling data scaler = preprocessing.MinMaxScaler(feature_range=(0.05, 0.95)) topology.fit_transform(lens=[REDUCTIONS[algorithm]], scaler=scaler) body = { "point_cloud": _ndarray_to_list(topology.point_cloud), } r = create_response(body) r.set_header('Cache-Control', 'max-age=86400') return r except Exception as e: body = json.dumps({"error_msg": e.args[0]}) r = create_response(body) return r
def test_transform_data_none(): t = Topology() with pytest.raises(Exception): t.fit_transform()
def test_load_data_none(): data = None t = Topology() with pytest.raises(ValueError): t.load_data(data)
def test_search_target_data(): data = np.array([[0., 0.], [0.1, 0.1], [0.2, 0.2], [0.2, 0.8], [0.1, 0.9], [0., 1.], [0.8, 0.8], [0.9, 0.9], [1., 1.]]) columns = np.array(["columns1", "columns2"]) target = np.array([[0], [0], [0], [1], [1], [1], [2], [2], [2]]) text_data = np.array([["a"], ["a"], ["a"], ["b"], ["b"], ["b"], ["c"], ["c"], ["c"]]) text_data_columns = np.array(["text_columns"]) t = Topology() t.load_data(data, number_data_columns=columns, text_data=text_data, text_data_columns=text_data_columns) t.fit_transform(metric=None, lens=None) t.map(resolution=2, overlap=0.3) t.color(target, color_method="mean", color_type="rgb", normalize=True) search_dicts = [{ "data_type": "number", "operator": "=", "column": -1, "value": 2 }] t.search_from_values(search_dicts=search_dicts, target=target, search_type="and") test_color = ['#cccccc', '#cccccc', '#b20000'] assert t.hex_colors == test_color
def test_load_data_not_2d_array(): data = [[[0.0, 0.0], [1.0, 1.0]]] t = Topology() with pytest.raises(ValueError): t.load_data(data)
def create(): try: # get request params data = json.loads(request.params.data) file_id = int(data["file_id"]) file_name = _get_file_name_from_id(file_id) file_path = os.path.join(DATA_DIR, file_name) # create topology instance topology = Topology(verbose=0) loader = CSVLoader(file_path) topology.load(loader=loader, standardize=True) target_index = data["target_index"] mode = int(data["mode"]) clustering_algorithm = int(data["clustering_algorithm"]) train_size = float(data["train_size"]) k = int(data["k"]) eps = float(data["eps"]) min_samples = int(data["min_samples"]) resolution = int(data["resolution"]) overlap = float(data["overlap"]) topology.point_cloud = np.array(data["point_cloud"]) visualize_mode = int(data["visualize_mode"]) if mode == 0: # scatter plot colors = [] for i in range(topology.number_data.shape[1]): t = topology.number_data[:, i] scaler = preprocessing.MinMaxScaler() t = scaler.fit_transform(t.reshape(-1, 1)) topology.color_point_cloud(target=t, normalize=False) colors.append(topology.point_cloud_hex_colors) elif mode == 1: # unsupervised_clusterings # If target index isn't exists, use all data to calculate if target_index != '': topology.number_data, target = _split_target( topology.number_data, int(target_index)) clusters = [ cluster.KMeans(n_clusters=k), cluster.DBSCAN(eps=eps, min_samples=min_samples) ] topology.unsupervised_clustering_point_cloud( clusterer=clusters[clustering_algorithm]) if target_index != '': topology.number_data = _concat_target(topology.number_data, target, int(target_index)) colors = [] for i in range(topology.number_data.shape[1]): colors.append(topology.point_cloud_hex_colors) elif mode == 2: # supervised_clusterings # If target index isn't exists, use all data to calculate if target_index != '': topology.number_data, target = _split_target( topology.number_data, int(target_index)) clusters = [neighbors.KNeighborsClassifier(n_neighbors=k)] topology.supervised_clustering_point_cloud( clusterer=clusters[clustering_algorithm], target=target, train_size=train_size) if target_index != '': topology.number_data = _concat_target(topology.number_data, target, int(target_index)) colors = [] for i in range(topology.number_data.shape[1]): colors.append(topology.point_cloud_hex_colors) elif mode == 3: # tda topology.map(resolution=resolution, overlap=overlap, eps=eps, min_samples=min_samples) if visualize_mode == 2: presenter = SpectralPresenter(fig_size=(10, 10), node_size=5, edge_width=1) pos = presenter._get_position(topology.nodes, topology.edges) topology.nodes = np.array(list(pos.values())) print("spectral") colors = [] for i in range(topology.number_data.shape[1]): t = topology.number_data[:, i] scaler = preprocessing.MinMaxScaler() t = scaler.fit_transform(t.reshape(-1, 1)) topology.color(target=t) colors.append(topology.hex_colors) if mode < 3: hypercubes = _ndarray_to_list( np.arange(len(topology.point_cloud)).reshape(-1, 1)) nodes = _ndarray_to_list(topology.point_cloud) edges = [] node_sizes = [MIN_NODE_SIZE] * len(topology.point_cloud) colors = colors elif mode == 3: hypercubes = _convert_hypercubes_to_array(topology.hypercubes) nodes = _ndarray_to_list(topology.nodes) edges = _ndarray_to_list(topology.edges) scaler = preprocessing.MinMaxScaler(feature_range=(MIN_NODE_SIZE, MAX_NODE_SIZE)) node_sizes = _ndarray_to_list( scaler.fit_transform(topology.node_sizes)) colors = colors body = { "hypercubes": hypercubes, "nodes": nodes, "edges": edges, "node_sizes": node_sizes, "colors": colors, "train_index": _ndarray_to_list(topology.train_index) } r = create_response(body) return r except Exception as e: body = json.dumps({"error_msg": e.args[0]}) r = create_response(body) return r
def test_search_multiple_values(): data = np.array([[0., 0.], [0.1, 0.1], [0.2, 0.2], [0.2, 0.8], [0.1, 0.9], [0., 1.], [0.8, 0.8], [0.9, 0.9], [1., 1.]]) target = np.array([[0], [0], [0], [1], [1], [1], [2], [2], [2]]) text_data = np.array([["a"], ["a"], ["a"], ["b"], ["b"], ["b"], ["c"], ["c"], ["c"]]) t = Topology() t.load_data(data, text_data=text_data) t.fit_transform(metric=None, lens=None) t.map(resolution=2, overlap=0.3) t.color(target, color_method="mean", color_type="rgb", normalize=True) search_dicts = [{ "data_type": "number", "operator": "<", "column": 0, "value": 0.3 }, { "data_type": "text", "operator": "like", "column": 0, "value": "a" }] t.search_from_values(search_dicts=search_dicts, target=None, search_type="and") test_color = ['#0000b2', '#cccccc', '#cccccc'] assert t.hex_colors == test_color
def search(): try: data = json.loads(request.params.data) topology = Topology(verbose=0) file_id = int(data["file_id"]) file_name = _get_file_name_from_id(file_id) file_path = os.path.join(DATA_DIR, file_name) # create topology instance topology = Topology(verbose=0) loader = CSVLoader(file_path) topology.load(loader=loader, standardize=True) target_index = data["target_index"] mode = int(data["mode"]) clustering_algorithm = int(data["clustering_algorithm"]) train_size = float(data["train_size"]) k = int(data["k"]) eps = float(data["eps"]) min_samples = int(data["min_samples"]) topology.point_cloud = np.array(data["point_cloud"]) hypercubes = data["hypercubes"] topology.hypercubes = _convert_array_to_hypercubes(hypercubes) topology.nodes = np.array(data["nodes"]) topology.edges = np.array(data["edges"]) search_type = data["search_type"] search_dicts = [] for condition in data["search_conditions"]: c = { "data_type": condition["data_type"], "column": condition["column"], "operator": condition["operator"], "value": _decode_txt(condition["value"]) } search_dicts.append(c) colors = [] if mode == 0: for i in range(topology.number_data.shape[1]): t = topology.number_data[:, i] scaler = preprocessing.MinMaxScaler() t = scaler.fit_transform(t.reshape(-1, 1)) topology.color_point_cloud(target=t) if len(search_dicts) > 0: topology.search_point_cloud(search_dicts=search_dicts, search_type=search_type) colors.append(topology.point_cloud_hex_colors) elif mode == 1: # unsupervised_clusterings if target_index != '': topology.number_data, target = _split_target( topology.number_data, int(target_index)) clusters = [ cluster.KMeans(n_clusters=k), cluster.DBSCAN(eps=eps, min_samples=min_samples) ] topology.unsupervised_clustering_point_cloud( clusterer=clusters[clustering_algorithm]) if target_index != '': topology.number_data = _concat_target(topology.number_data, target, int(target_index)) if len(search_dicts) > 0: topology.search_point_cloud(search_dicts=search_dicts, search_type=search_type) colors = [] for i in range(topology.number_data.shape[1]): colors.append(topology.point_cloud_hex_colors) elif mode == 2: # supervised_clusterings if target_index != '': topology.number_data, target = _split_target( topology.number_data, int(target_index)) clusters = [neighbors.KNeighborsClassifier(n_neighbors=k)] topology.supervised_clustering_point_cloud( clusterer=clusters[clustering_algorithm], target=target, train_size=train_size) if target_index != '': topology.number_data = _concat_target(topology.number_data, target, int(target_index)) if len(search_dicts) > 0: topology.search_point_cloud(search_dicts=search_dicts, search_type=search_type) colors = [] for i in range(topology.number_data.shape[1]): colors.append(topology.point_cloud_hex_colors) elif mode == 3: # tda topology.graph_util = GraphUtil(point_cloud=topology.point_cloud, hypercubes=topology.hypercubes) colors = [] for i in range(topology.number_data.shape[1]): t = topology.number_data[:, i] scaler = preprocessing.MinMaxScaler() t = scaler.fit_transform(t.reshape(-1, 1)) topology.color(target=t) if len(search_dicts) > 0: topology.search(search_dicts=search_dicts, search_type=search_type) colors.append(topology.hex_colors) body = { "colors": colors, } r = create_response(body) return r except Exception as e: body = json.dumps({"error_msg": e.args[0]}) r = create_response(body) return r
def test_map_point_cloud_none(): t = Topology() with pytest.raises(Exception): t.map()