def test_color_mean_gray():
    data = np.array([[0., 0.],
                     [0.1, 0.1],
                     [0.2, 0.2],
                     [0.2, 0.8],
                     [0.1, 0.9],
                     [0., 1.],
                     [0.8, 0.8],
                     [0.9, 0.9],
                     [1., 1.]])

    target = np.array([[0], [0], [0],
                       [1], [1.1], [0.9],
                       [2], [2], [2]])

    t = Topology()
    t.load_data(data)
    t.fit_transform(metric=None, lens=None)

    t.map(resolution=2, overlap=0.3, eps=0.2, min_samples=3)

    t.color(target, color_method="mean", color_type="gray", normalize=True)

    test_color = ['#dcdcdc', '#787878', '#141414']

    assert t.hex_colors == test_color
def test_load_data_ndarray_data():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])

    t = Topology()
    t.load_data(data)

    assert_array_equal(t.number_data, data)
def test_load_data_text_data_columns_text_data_diff_columns():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])
    text_data = np.array([["data1-1", "data1-2"], ["data2-1", "data2-2"]])
    text_data_columns = ["columns1"]

    t = Topology()
    with pytest.raises(ValueError):
        t.load_data(data, text_data=text_data, text_data_columns=text_data_columns)
def test_load_data_number_data_columns_ndarray():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])
    data_columns = np.array(["data1", "data2"])

    t = Topology()
    t.load_data(data, number_data_columns=data_columns)

    assert_array_equal(t.number_data_columns, data_columns)
def test_load_data_text_data_ndarray():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])
    text_data = np.array([["data1"], ["data2"]])

    t = Topology()
    t.load_data(data, text_data=text_data)

    assert_array_equal(t.text_data, text_data)
def test_load_data_text_data_columns_text_data_is_none():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])
    text_data = None
    text_data_columns = ["columns1", "columns2"]

    t = Topology()
    with pytest.raises(ValueError):
        t.load_data(data, text_data=text_data, text_data_columns=text_data_columns)
def test_load_data_standardize_true():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])

    t = Topology()
    t.load_data(data, standardize=True)

    scaler = preprocessing.StandardScaler()
    test_data = scaler.fit_transform(data)
    assert_array_equal(t.std_number_data, test_data)
def test_load_data_text_data_columns():
    data = np.array([[0.0, 0.0], [1.0, 1.0]])
    text_data = np.array([["data1-1", "data1-2"], ["data2-1", "data2-2"]])
    text_data_columns = ["columns1", "columns2"]

    t = Topology()
    t.load_data(data, text_data=text_data, text_data_columns=text_data_columns)

    assert_array_equal(t.text_data_columns, np.array(text_data_columns))
def test_map():
    data = np.array([[0., 0.],
                     [0.25, 0.25],
                     [0.5, 0.5],
                     [0.75, 0.75],
                     [1., 1.],
                     [1., 0.],
                     [0.25, 0.75],
                     [0.75, 0.25],
                     [0., 1.]])

    t = Topology()
    t.load_data(data)
    t.fit_transform(metric=None, lens=None)

    t.map(resolution=2, overlap=0.3, eps=0.3, min_samples=3)

    test_nodes = np.array([[0.25, 0.25],
                           [0.25, 0.75],
                           [0.75, 0.25],
                           [0.75, 0.75]])

    test_edges = np.array([[0, 1],
                           [0, 2],
                           [0, 3],
                           [1, 2],
                           [1, 3],
                           [2, 3]])

    assert_array_equal(t.nodes, test_nodes)
    assert_array_equal(t.edges, test_edges)
def test_color_different_size_input():
    data = np.array([[0., 0.],
                     [0.1, 0.1],
                     [0.2, 0.2],
                     [0.2, 0.8],
                     [0.1, 0.9],
                     [0., 1.],
                     [0.8, 0.8],
                     [0.9, 0.9],
                     [1., 1.]])

    target = np.array([0, 1, 2])

    t = Topology()
    t.load_data(data)
    t.fit_transform()
    t.map()

    with pytest.raises(Exception):
        t.color(target)
def test_color_ctype():
    data = np.array([[0., 0.],
                     [0.1, 0.1],
                     [0.2, 0.2],
                     [0.2, 0.8],
                     [0.1, 0.9],
                     [0., 1.],
                     [0.8, 0.8],
                     [0.9, 0.9],
                     [1., 1.]])

    target = np.array([[0], [0], [0],
                       [1], [1], [1],
                       [2], [2], [2]])

    t = Topology()
    t.load_data(data)
    t.fit_transform()
    t.map(resolution=2, overlap=0.3)

    with pytest.raises(Exception):
        t.color(target, color_type="hoge")
def test_map_min_samples_under_zero():
    data = np.array([[0., 0.],
                     [0.25, 0.25],
                     [0.5, 0.5],
                     [0.75, 0.75],
                     [1., 1.],
                     [1., 0.],
                     [0.25, 0.75],
                     [0.75, 0.25],
                     [0., 1.]])

    t = Topology()
    t.load_data(data)
    t.fit_transform(metric=None, lens=None)
    with pytest.raises(Exception):
        t.map(min_samples=-1)
def test_transform_none_none():
    data = np.array([[0., 0.], [1., 1.]])

    t = Topology()
    t.load_data(data)

    metric = None
    lens = None
    t.fit_transform(metric=metric, lens=lens)

    test_data = np.array([[0., 0.], [1., 1.]])

    assert_array_equal(t.point_cloud, test_data)
def test_transform_multi_lens():
    data = np.array([[0., 0.], [0., 1.], [1., 1.]])

    t = Topology()
    t.load_data(data)

    metric = "hamming"
    lens = [L1Centrality(), GaussianDensity(h=0.25)]
    t.fit_transform(metric=metric, lens=lens)

    test_data = np.array([[1., 0.], [0., 1.], [1., 0.]])

    assert_array_equal(t.point_cloud, test_data)
def test_transform_none_pca():
    data = np.array([[0., 1.], [1., 0.]])

    t = Topology()
    t.load_data(data)

    metric = None
    lens = [PCA(components=[0])]
    t.fit_transform(metric=metric, lens=lens)

    test_data = np.array([0., 1.])
    test_data = test_data.reshape(test_data.shape[0], 1)

    assert_array_equal(t.point_cloud, test_data)
Exemple #16
0
def reduction():
    try:
        # get request params
        file_id = request.params.file_id
        target_index = request.params.target_index
        algorithm = int(request.params.algorithm)

        # get file name
        file_name = _get_file_name_from_id(file_id)
        file_path = os.path.join(DATA_DIR, file_name)

        # create topology instance
        topology = Topology(verbose=0)
        loader = CSVLoader(file_path)
        topology.load(loader=loader, standardize=True)

        # If target index isn't exists, use all data to calculate
        if target_index != '':
            topology.number_data, target = _split_target(
                topology.number_data, int(target_index))

        # transform & scaling data
        scaler = preprocessing.MinMaxScaler(feature_range=(0.05, 0.95))
        topology.fit_transform(lens=[REDUCTIONS[algorithm]], scaler=scaler)

        body = {
            "point_cloud": _ndarray_to_list(topology.point_cloud),
        }
        r = create_response(body)
        r.set_header('Cache-Control', 'max-age=86400')
        return r

    except Exception as e:
        body = json.dumps({"error_msg": e.args[0]})
        r = create_response(body)
        return r
def test_transform_data_none():
    t = Topology()
    with pytest.raises(Exception):
        t.fit_transform()
def test_load_data_none():
    data = None

    t = Topology()
    with pytest.raises(ValueError):
        t.load_data(data)
def test_search_target_data():
    data = np.array([[0., 0.],
                     [0.1, 0.1],
                     [0.2, 0.2],
                     [0.2, 0.8],
                     [0.1, 0.9],
                     [0., 1.],
                     [0.8, 0.8],
                     [0.9, 0.9],
                     [1., 1.]])

    columns = np.array(["columns1", "columns2"])

    target = np.array([[0], [0], [0],
                       [1], [1], [1],
                       [2], [2], [2]])

    text_data = np.array([["a"], ["a"], ["a"],
                          ["b"], ["b"], ["b"],
                          ["c"], ["c"], ["c"]])
    text_data_columns = np.array(["text_columns"])

    t = Topology()
    t.load_data(data, number_data_columns=columns, text_data=text_data, text_data_columns=text_data_columns)
    t.fit_transform(metric=None, lens=None)
    t.map(resolution=2, overlap=0.3)
    t.color(target, color_method="mean", color_type="rgb", normalize=True)
    search_dicts = [{
        "data_type": "number",
        "operator": "=",
        "column": -1,
        "value": 2
    }]
    t.search_from_values(search_dicts=search_dicts, target=target, search_type="and")

    test_color = ['#cccccc', '#cccccc', '#b20000']
    assert t.hex_colors == test_color
def test_load_data_not_2d_array():
    data = [[[0.0, 0.0], [1.0, 1.0]]]

    t = Topology()
    with pytest.raises(ValueError):
        t.load_data(data)
Exemple #21
0
def create():
    try:
        # get request params
        data = json.loads(request.params.data)

        file_id = int(data["file_id"])
        file_name = _get_file_name_from_id(file_id)
        file_path = os.path.join(DATA_DIR, file_name)

        # create topology instance
        topology = Topology(verbose=0)
        loader = CSVLoader(file_path)
        topology.load(loader=loader, standardize=True)

        target_index = data["target_index"]
        mode = int(data["mode"])
        clustering_algorithm = int(data["clustering_algorithm"])
        train_size = float(data["train_size"])
        k = int(data["k"])
        eps = float(data["eps"])
        min_samples = int(data["min_samples"])
        resolution = int(data["resolution"])
        overlap = float(data["overlap"])
        topology.point_cloud = np.array(data["point_cloud"])

        visualize_mode = int(data["visualize_mode"])

        if mode == 0:
            # scatter plot
            colors = []
            for i in range(topology.number_data.shape[1]):
                t = topology.number_data[:, i]
                scaler = preprocessing.MinMaxScaler()
                t = scaler.fit_transform(t.reshape(-1, 1))
                topology.color_point_cloud(target=t, normalize=False)
                colors.append(topology.point_cloud_hex_colors)

        elif mode == 1:
            # unsupervised_clusterings
            # If target index isn't exists, use all data to calculate
            if target_index != '':
                topology.number_data, target = _split_target(
                    topology.number_data, int(target_index))

            clusters = [
                cluster.KMeans(n_clusters=k),
                cluster.DBSCAN(eps=eps, min_samples=min_samples)
            ]
            topology.unsupervised_clustering_point_cloud(
                clusterer=clusters[clustering_algorithm])

            if target_index != '':
                topology.number_data = _concat_target(topology.number_data,
                                                      target,
                                                      int(target_index))

            colors = []
            for i in range(topology.number_data.shape[1]):
                colors.append(topology.point_cloud_hex_colors)

        elif mode == 2:
            # supervised_clusterings
            # If target index isn't exists, use all data to calculate
            if target_index != '':
                topology.number_data, target = _split_target(
                    topology.number_data, int(target_index))

            clusters = [neighbors.KNeighborsClassifier(n_neighbors=k)]
            topology.supervised_clustering_point_cloud(
                clusterer=clusters[clustering_algorithm],
                target=target,
                train_size=train_size)

            if target_index != '':
                topology.number_data = _concat_target(topology.number_data,
                                                      target,
                                                      int(target_index))

            colors = []
            for i in range(topology.number_data.shape[1]):
                colors.append(topology.point_cloud_hex_colors)

        elif mode == 3:
            # tda
            topology.map(resolution=resolution,
                         overlap=overlap,
                         eps=eps,
                         min_samples=min_samples)

            if visualize_mode == 2:
                presenter = SpectralPresenter(fig_size=(10, 10),
                                              node_size=5,
                                              edge_width=1)
                pos = presenter._get_position(topology.nodes, topology.edges)
                topology.nodes = np.array(list(pos.values()))
                print("spectral")

            colors = []
            for i in range(topology.number_data.shape[1]):
                t = topology.number_data[:, i]
                scaler = preprocessing.MinMaxScaler()
                t = scaler.fit_transform(t.reshape(-1, 1))
                topology.color(target=t)
                colors.append(topology.hex_colors)

        if mode < 3:
            hypercubes = _ndarray_to_list(
                np.arange(len(topology.point_cloud)).reshape(-1, 1))
            nodes = _ndarray_to_list(topology.point_cloud)
            edges = []
            node_sizes = [MIN_NODE_SIZE] * len(topology.point_cloud)
            colors = colors
        elif mode == 3:
            hypercubes = _convert_hypercubes_to_array(topology.hypercubes)
            nodes = _ndarray_to_list(topology.nodes)
            edges = _ndarray_to_list(topology.edges)
            scaler = preprocessing.MinMaxScaler(feature_range=(MIN_NODE_SIZE,
                                                               MAX_NODE_SIZE))
            node_sizes = _ndarray_to_list(
                scaler.fit_transform(topology.node_sizes))
            colors = colors

        body = {
            "hypercubes": hypercubes,
            "nodes": nodes,
            "edges": edges,
            "node_sizes": node_sizes,
            "colors": colors,
            "train_index": _ndarray_to_list(topology.train_index)
        }
        r = create_response(body)
        return r
    except Exception as e:
        body = json.dumps({"error_msg": e.args[0]})
        r = create_response(body)
        return r
def test_search_multiple_values():
    data = np.array([[0., 0.],
                     [0.1, 0.1],
                     [0.2, 0.2],
                     [0.2, 0.8],
                     [0.1, 0.9],
                     [0., 1.],
                     [0.8, 0.8],
                     [0.9, 0.9],
                     [1., 1.]])

    target = np.array([[0], [0], [0],
                       [1], [1], [1],
                       [2], [2], [2]])

    text_data = np.array([["a"], ["a"], ["a"],
                          ["b"], ["b"], ["b"],
                          ["c"], ["c"], ["c"]])

    t = Topology()
    t.load_data(data, text_data=text_data)
    t.fit_transform(metric=None, lens=None)
    t.map(resolution=2, overlap=0.3)
    t.color(target, color_method="mean", color_type="rgb", normalize=True)
    search_dicts = [{
        "data_type": "number",
        "operator": "<",
        "column": 0,
        "value": 0.3
    }, {
        "data_type": "text",
        "operator": "like",
        "column": 0,
        "value": "a"
    }]
    t.search_from_values(search_dicts=search_dicts, target=None, search_type="and")

    test_color = ['#0000b2', '#cccccc', '#cccccc']
    assert t.hex_colors == test_color
Exemple #23
0
def search():
    try:
        data = json.loads(request.params.data)
        topology = Topology(verbose=0)
        file_id = int(data["file_id"])
        file_name = _get_file_name_from_id(file_id)
        file_path = os.path.join(DATA_DIR, file_name)

        # create topology instance
        topology = Topology(verbose=0)
        loader = CSVLoader(file_path)
        topology.load(loader=loader, standardize=True)

        target_index = data["target_index"]
        mode = int(data["mode"])
        clustering_algorithm = int(data["clustering_algorithm"])
        train_size = float(data["train_size"])
        k = int(data["k"])
        eps = float(data["eps"])
        min_samples = int(data["min_samples"])
        topology.point_cloud = np.array(data["point_cloud"])
        hypercubes = data["hypercubes"]
        topology.hypercubes = _convert_array_to_hypercubes(hypercubes)
        topology.nodes = np.array(data["nodes"])
        topology.edges = np.array(data["edges"])
        search_type = data["search_type"]

        search_dicts = []
        for condition in data["search_conditions"]:
            c = {
                "data_type": condition["data_type"],
                "column": condition["column"],
                "operator": condition["operator"],
                "value": _decode_txt(condition["value"])
            }
            search_dicts.append(c)

        colors = []
        if mode == 0:
            for i in range(topology.number_data.shape[1]):
                t = topology.number_data[:, i]
                scaler = preprocessing.MinMaxScaler()
                t = scaler.fit_transform(t.reshape(-1, 1))
                topology.color_point_cloud(target=t)
                if len(search_dicts) > 0:
                    topology.search_point_cloud(search_dicts=search_dicts,
                                                search_type=search_type)
                colors.append(topology.point_cloud_hex_colors)
        elif mode == 1:
            # unsupervised_clusterings
            if target_index != '':
                topology.number_data, target = _split_target(
                    topology.number_data, int(target_index))

            clusters = [
                cluster.KMeans(n_clusters=k),
                cluster.DBSCAN(eps=eps, min_samples=min_samples)
            ]
            topology.unsupervised_clustering_point_cloud(
                clusterer=clusters[clustering_algorithm])

            if target_index != '':
                topology.number_data = _concat_target(topology.number_data,
                                                      target,
                                                      int(target_index))

            if len(search_dicts) > 0:
                topology.search_point_cloud(search_dicts=search_dicts,
                                            search_type=search_type)

            colors = []
            for i in range(topology.number_data.shape[1]):
                colors.append(topology.point_cloud_hex_colors)

        elif mode == 2:
            # supervised_clusterings
            if target_index != '':
                topology.number_data, target = _split_target(
                    topology.number_data, int(target_index))

            clusters = [neighbors.KNeighborsClassifier(n_neighbors=k)]
            topology.supervised_clustering_point_cloud(
                clusterer=clusters[clustering_algorithm],
                target=target,
                train_size=train_size)

            if target_index != '':
                topology.number_data = _concat_target(topology.number_data,
                                                      target,
                                                      int(target_index))

            if len(search_dicts) > 0:
                topology.search_point_cloud(search_dicts=search_dicts,
                                            search_type=search_type)

            colors = []
            for i in range(topology.number_data.shape[1]):
                colors.append(topology.point_cloud_hex_colors)

        elif mode == 3:
            # tda
            topology.graph_util = GraphUtil(point_cloud=topology.point_cloud,
                                            hypercubes=topology.hypercubes)

            colors = []
            for i in range(topology.number_data.shape[1]):
                t = topology.number_data[:, i]
                scaler = preprocessing.MinMaxScaler()
                t = scaler.fit_transform(t.reshape(-1, 1))
                topology.color(target=t)
                if len(search_dicts) > 0:
                    topology.search(search_dicts=search_dicts,
                                    search_type=search_type)
                colors.append(topology.hex_colors)

        body = {
            "colors": colors,
        }
        r = create_response(body)
        return r

    except Exception as e:
        body = json.dumps({"error_msg": e.args[0]})
        r = create_response(body)
        return r
def test_map_point_cloud_none():
    t = Topology()
    with pytest.raises(Exception):
        t.map()