예제 #1
0
def test_plot2d_graph():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [2, 1], [5, 5],
                       [7, 6]])

    pregraph = knn_graph(df, 3)
    graph = pre_part_graph(pregraph, 10, df)
    plot2d_graph(graph)
예제 #2
0
def test_pre_part_graph():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [5, 5], [7, 2]])
    k = 3

    pregraph = knn_graph(df, k, verbose=False)
    _ = pre_part_graph(pregraph, 2, df, verbose=False)
    print(df["cluster"].values)
    assert (df["cluster"].values == np.array([0, 1, 1, 0, 0, 1, 1])).all()
예제 #3
0
def test_rebuild_labels():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [5, 5], [7, 2]])

    pregraph = knn_graph(df, 3, verbose=False)
    _ = pre_part_graph(pregraph, 10, df, verbose=False)

    df_bis = rebuild_labels(df)

    assert sorted(list(df_bis["cluster"])) == [1, 1, 1, 1, 2, 2, 2]
예제 #4
0
def test_len_edges():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [5, 5], [7, 2]])

    pregraph = knn_graph(df, 3, verbose=False)
    graph = pre_part_graph(pregraph, 10, df, verbose=False)

    condition0 = len_edges(graph, [0, 3, 4]) == 3
    condition1 = len_edges(graph, [1, 2, 5, 6]) == 6

    assert condition0 & condition1
예제 #5
0
def test_connecting_edges():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [5, 5], [7, 2]])

    pregraph = knn_graph(df, 3, verbose=False)
    graph = pre_part_graph(pregraph, 10, df, verbose=False)

    assert connecting_edges(([0, 3, 4], [1, 2, 5, 6]), graph) == [
        (0, 5),
        (3, 5),
        (4, 5),
    ]
예제 #6
0
def test_get_cluster():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [5, 5], [7, 2]])
    k = 3

    pregraph = knn_graph(df, 3, verbose=False)
    graph = pre_part_graph(pregraph, k, df, verbose=False)

    condition0 = get_cluster(graph, [0]) == [0, 3, 4]
    condition1 = get_cluster(graph, [k - 1]) == [1, 2, 5, 6]

    assert condition0 & condition1
예제 #7
0
def test_conn_comp():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2], [2, 1], [5, 5],
                       [7, 6]])

    pregraph = knn_graph(df, 3)
    graph = pre_part_graph(pregraph, 10, df)

    res = conn_comp(graph)
    condition0 = sorted(res[0]) == [0, 3, 4, 5]
    condition1 = sorted(res[1]) == [1, 2, 6, 7]

    assert condition0 & condition1
예제 #8
0
def test_plot2d_data():
    df = pd.DataFrame(make_blobs(60, random_state=42)[0])
    knn = 6

    pregraph = knn_graph(df, knn, verbose=False)
    graph = pre_part_graph(pregraph, 10, df, verbose=False)
    df, max_score, ci = merge_best(graph=graph,
                                   df=df,
                                   a=2,
                                   k=3,
                                   verbose=False,
                                   verbose2=False)

    plot2d_data(df)
예제 #9
0
def test_merge_best():
    df = pd.DataFrame(make_blobs(60, random_state=42)[0])
    knn = 6

    pregraph = knn_graph(df, knn, verbose=False)
    graph = pre_part_graph(pregraph, 10, df, verbose=False)
    df, max_score, ci = merge_best(graph=graph,
                                   df=df,
                                   a=2,
                                   k=3,
                                   verbose=False,
                                   verbose2=False)

    condition0 = round(max_score) == 1
    condition1 = ci == 5

    assert condition0 & condition1
예제 #10
0
def test_knn_graph():
    df = pd.DataFrame([[1, 1], [6, 5], [6, 6], [0, 0], [1, 2]])
    k = 2

    graph = knn_graph(df, k, verbose=False)

    condition0 = list(graph.edges) == [
        (0, 4),
        (0, 3),
        (1, 2),
        (1, 4),
        (2, 4),
        (3, 4),
    ]
    condition1 = list(graph.nodes) == [0, 1, 2, 3, 4]

    assert condition0 & condition1
예제 #11
0
    def cluster_gui(self, df, k, knn=10, m=30, alpha=2.0, save_plots=None):
        if k is None:
            k = 1

        self.log.appendPlainText("Building kNN graph (k={})\n...".format(knn))
        self.log.appendPlainText("")
        graph = knn_graph(df, knn, False)

        self.plot2d_graph_gui(
            graph=graph,
            canvas=self.canvas_up,
            ax=self.ax1,
            save_plots=save_plots,
            ind_fig=self.ind_fig,
            print_clust=False,
        )

        graph = self.pre_part_graph_gui(graph=graph,
                                        canvas=self.canvas_up,
                                        ax=self.ax1,
                                        k=m,
                                        df=df,
                                        plotting=True)

        # to account for cases where initial_clust is too big or k is already reached before the merging phase
        cl_dict = {
            list(graph.node)[i]: graph.node[i]["cluster"]
            for i in range(len(graph))
        }
        m = len(Counter(cl_dict.values()))
        self.log.appendPlainText("")
        self.log.appendPlainText("actual init_clust: {}".format(m))
        self.log.appendPlainText("")

        dendr_height = OrderedDict({})
        iterm = enumerate(range(m - k))

        for i, _ in iterm:

            df, ms, ci = self.merge_best_gui(graph,
                                             df,
                                             alpha,
                                             k,
                                             verbose=False,
                                             verbose2=True)

            if ms == 0:
                break

            dendr_height[m - (i + 1)] = ms

            self.plot2d_data_gui(
                df=df,
                col_i=ci,
                canvas=self.canvas_down,
                ax=self.ax,
                save_plots=save_plots,
                ind_fig=self.ind_fig,
            )
            self.ind_fig += 1

        res = rebuild_labels(df)

        return res, dendr_height