예제 #1
0
def point_plot_mod2(X, a, reps, level_txt, level2_txt=None,
                    par_index=None, u=None, u_cl=None, initial_ind=None, last_reps=None,
                    not_sampled=None, not_sampled_ind=None, n_rep_fin=None):
    """
    Scatter-plot of input data points, colored according to the cluster they belong to.
    A rectangle with red borders is displayed around the last merged cluster; representative points
    of last merged cluster are also plotted in red, along with the center of mass, plotted as a
    red cross. The current number of clusters and current distance are also displayed in the right
    upper corner.
    In the last phase of CURE algorithm variation for large datasets, arrows are
    displayed from every not sampled point to its closest representative point; moreover, representative
    points are surrounded by small circles, to make them more visible. Representative points of different
    clusters are plotted in different nuances of red.

    :param X: input data array.
    :param a: input dataframe built by CURE algorithm, listing the cluster and the x and y
              coordinates of each point.
    :param reps: list of the coordinates of representative points.
    :param level_txt: distance at which current merging occurs displayed in the upper right corner.
    :param level2_txt: incremental distance (not used).
    :param par_index: partial index to take the shuffling of indexes into account.
    :param u: first cluster to be merged.
    :param u_cl: second cluster to be merged.
    :param initial_ind: initial partial index.
    :param last_reps: dictionary of last representative points.
    :param not_sampled: coordinates of points that have not been initially sampled, in the large dataset version.
    :param not_sampled_ind: indexes of not_sampled point_indices.
    :param n_rep_fin: number of representatives to use for each cluster in the final assignment phase in the large
                      dataset version.
    :return list_keys_diz: if par_index is not None, returns the new indexes of par_index.

    """

    # diz is used to take the shuffling of data into account, e.g. if the first row doesn'#
    # correspond to point 0: this is useful for the large dataset version of CURE, where data points
    # are randomly sampled, but the initial indices are kept to be plotted.
    if par_index is not None:
        diz = dict(zip(par_index, [i for i in range(len(par_index))]))

    fig, ax = plt.subplots(figsize=(14, 6))

    # points that still need to be processed are plotted in lime color
    plt.scatter(X[:, 0], X[:, 1], s=300, color="lime", edgecolor="black")

    # drops the totally null columns, so that the number of columns goes to 2*(cardinality of biggest cluster)
    a = a.dropna(1, how="all")

    colors = {0: "seagreen", 1: 'lightcoral', 2: 'yellow', 3: 'grey',
              4: 'pink', 5: 'turquoise', 6: 'orange', 7: 'purple', 8: 'yellowgreen', 9: 'olive', 10: 'brown',
              11: 'tan', 12: 'plum', 13: 'rosybrown', 14: 'lightblue', 15: "khaki", 16: "gainsboro", 17: "peachpuff"}
    color_dict_rect = convert_colors(colors, alpha=0.3)

    # to speed things up, this splits all points inside the clusters' names, and start gives the starting index
    # that shows where clusters with more than 1 element start (because they are always appended to a)
    len_ind = [len(i.split("-")) for i in list(a.index)]
    start = np.min([i for i in range(len(len_ind)) if len_ind[i] > 1])

    # for each cluster, take the single points composing it and plot them in the appropriate color, if
    # necessary taking the labels of par_index into account
    for ind, i in enumerate(range(start, len(a))):
        point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
        if par_index is not None:
            X_clust = [X[diz[point[j]], 0] for j in range(len(point))]
            Y_clust = [X[diz[point[j]], 1] for j in range(len(point))]

            ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])
        else:
            point = [int(i) for i in point]
            X_clust = [X[point[j], 0] for j in range(len(point))]
            Y_clust = [X[point[j], 1] for j in range(len(point))]

            ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])

    # last merged cluster, so the last element of matrix a
    point = a.iloc[-1].name.replace("(", "").replace(")", "").split("-")
    # finding the new center of mass the newly merged cluster
    if par_index is not None:
        point = [diz[point[i]] for i in range(len(point))]
        com = X[point].mean(axis=0)
    else:
        point = [int(i) for i in point]
        com = X[point].mean(axis=0)

    # plotting the center of mass, marked with an X
    plt.scatter(com[0], com[1], s=400, color="r", marker="X", edgecolor="black")

    # plotting representative points in red
    x_reps = [i[0] for i in reps]
    y_reps = [i[1] for i in reps]
    plt.scatter(x_reps, y_reps, s=360, color="r", edgecolor="black")

    # finding the right measures for the rectangle
    rect_min = X[point].min(axis=0)
    rect_diff = X[point].max(axis=0) - rect_min

    xmin, xmax, ymin, ymax = plt.axis()
    xwidth = xmax - xmin
    ywidth = ymax - ymin

    # adding the rectangle, using two rectangles one above the other to use different colors
    # for the border and for the inside
    if len(point) <= 2:

        ax.add_patch(Rectangle((rect_min[0] - xwidth * 0.02, rect_min[1] - ywidth * 0.04),
                               rect_diff[0] + xwidth * 0.04, rect_diff[1] + ywidth * 0.08, fill=True,
                               color=color_dict_rect[ind % 18], linewidth=3,
                               ec="red"))
    else:
        encircle(X_clust, Y_clust, ax=ax, color=color_dict_rect[ind % 18], linewidth=3, ec="red")

    # adding labels to points in the plot

    if initial_ind is not None:
        for i, txt in enumerate(initial_ind):
            ax.annotate(txt, (X[:, 0][i], X[:, 1][i]), fontsize=10, size=10, ha='center', va='center')
    else:
        for i, txt in enumerate([i for i in range(len(X))]):
            ax.annotate(txt, (X[:, 0][i], X[:, 1][i]), fontsize=10, size=10, ha='center', va='center')

    # adding the annotations
    ax.annotate("min_dist: " + str(round(level_txt, 5)), (xmax * 0.75, ymax * 0.9), fontsize=12, size=12)

    if level2_txt is not None:
        ax.annotate("dist_incr: " + str(round(level2_txt, 5)), (xmax * 0.75, ymax * 0.8), fontsize=12, size=12)

    ax.annotate("n° clust: " + str(len(a)), (xmax * 0.75, ymax * 0.7), fontsize=12, size=12)

    plt.show()

    # everything down from here refers to the last phase of the large dataset version, the assignment phase
    if last_reps is not None:

        fig, ax = plt.subplots(figsize=(14, 6))

        # plot all the points in color lime
        plt.scatter(X[:, 0], X[:, 1], s=300, color="lime", edgecolor="black")

        # find the centers of mass of the clusters using the matrix a to find which points belong to
        # which cluster
        coms = []
        for ind, i in enumerate(range(0, len(a))):
            point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
            for j in range(len(point)):
                plt.scatter(X[diz[point[j]], 0], X[diz[point[j]], 1], s=350, color=colors[ind % 18])
            point = [diz[point[i]] for i in range(len(point))]
            coms.append(X[point].mean(axis=0))

        # variations of red to plot the representative points of the various clusters
        colors_reps = ["red", "crimson", "indianred", "lightcoral", "salmon", "darksalmon", "firebrick"]

        # flattening the last_reps values
        flat_reps = [item for sublist in list(last_reps.values()) for item in sublist]

        # plotting the representatives, surrounded by small circles, and the centers of mass, marked with X
        for i in range(len(last_reps)):
            len_rep = len(list(last_reps.values())[i])

            x = [list(last_reps.values())[i][j][0] for j in range(min(n_rep_fin, len_rep))]
            y = [list(last_reps.values())[i][j][1] for j in range(min(n_rep_fin, len_rep))]

            plt.scatter(x, y, s=400, color=colors_reps[i], edgecolor="black")
            plt.scatter(coms[i][0], coms[i][1], s=400, color=colors_reps[i], marker="X", edgecolor="black")

            for num in range(min(n_rep_fin, len_rep)):
                plt.gcf().gca().add_artist(plt.Circle((x[num], y[num]), xwidth * 0.03,
                                                      color=colors_reps[i], fill=False, linewidth=3, alpha=0.7))

            plt.scatter(not_sampled[:, 0], not_sampled[:, 1], s=400, color="lime", edgecolor="black")

        # find the closest representative for not sampled points, and draw an arrow connecting the points
        # to its closest representative
        for ind in range(len(not_sampled)):
            dist_int = []
            for el in flat_reps:
                dist_int.append(dist1(not_sampled[ind], el))
            ind_min = np.argmin(dist_int)

            plt.arrow(not_sampled[ind][0], not_sampled[ind][1],
                      flat_reps[ind_min][0] - not_sampled[ind][0], flat_reps[ind_min][1] - not_sampled[ind][1],
                      length_includes_head=True, head_width=0.03, head_length=0.05)

        # plotting the indexes for each point
        for i, txt in enumerate(initial_ind):
            ax.annotate(txt, (X[:, 0][i], X[:, 1][i]), fontsize=10, size=10, ha='center', va='center')

        if not_sampled_ind is not None:
            for i, txt in enumerate(not_sampled_ind):
                ax.annotate(txt, (not_sampled[:, 0][i], not_sampled[:, 1][i]), fontsize=10, size=10,
                            ha='center', va='center')

        plt.show()

    # if par_index is not None, diz is updated with the last merged cluster and its keys are returned
    if par_index is not None:
        diz["(" + u + ")" + "-" + "(" + u_cl + ")"] = len(diz)
        list_keys_diz = list(diz.keys())

        return list_keys_diz
예제 #2
0
    def point_plot_mod_gui(self,
                           a,
                           level_txt,
                           level2_txt=None,
                           save_plots=False,
                           ind_fig=None):
        """
        Scatter plot of data points, colored according to the cluster they belong to. The most recently
        merged cluster is enclosed in a rectangle of the same color as its points, with red borders.
        In the top right corner, the total distance is shown, along with the current number of clusters.
        When using Ward linkage, also the increment in distance is shown.

        :param a: distance matrix built by agg_clust/agg_clust_mod.
        :param level_txt: dist_tot displayed.
        :param level2_txt: dist_incr displayed.
        :param save_plots: if True, the produced image is saved.
        :param ind_fig: index of the figure that is saved.
        """
        self.ax1.clear()
        self.ax1.set_title("{} procedure".format(self.name))
        self.ax1.scatter(self.X[:, 0],
                         self.X[:, 1],
                         s=300,
                         color="lime",
                         edgecolor="black")

        a = a.dropna(1, how="all")

        colors = {
            0: "seagreen",
            1: 'lightcoral',
            2: 'yellow',
            3: 'grey',
            4: 'pink',
            5: 'navy',
            6: 'orange',
            7: 'purple',
            8: 'salmon',
            9: 'olive',
            10: 'brown',
            11: 'tan',
            12: 'plum',
            13: 'red',
            14: 'lightblue',
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff"
        }

        color_dict_rect = convert_colors(colors, alpha=0.3)

        len_ind = [len(i.split("-")) for i in list(a.index)]
        start = np.min([i for i in range(len(len_ind)) if len_ind[i] > 1])

        for ind, i in enumerate(range(start, len(a))):
            point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
            point = [int(i) for i in point]

            X_clust = [self.X[point[j], 0] for j in range(len(point))]
            Y_clust = [self.X[point[j], 1] for j in range(len(point))]

            self.ax1.scatter(X_clust, Y_clust, s=350, color=colors[ind % 17])

        point = a.iloc[-1].name.replace("(", "").replace(")", "").split("-")
        point = [int(i) for i in point]
        rect_min = self.X[point].min(axis=0)
        rect_diff = self.X[point].max(axis=0) - rect_min

        xwidth = self.ax1.axis()[1] - self.ax1.axis()[0]
        ywidth = self.ax1.axis()[3] - self.ax1.axis()[2]

        if len(X_clust) <= 5:

            self.ax1.add_patch(
                Rectangle(
                    (rect_min[0] - xwidth * 0.02, rect_min[1] - ywidth * 0.04),
                    rect_diff[0] + xwidth * 0.04,
                    rect_diff[1] + ywidth * 0.08,
                    fill=True,
                    color=color_dict_rect[ind % 17],
                    linewidth=3,
                    ec="red"))
        else:
            encircle(X_clust,
                     Y_clust,
                     ax=self.ax1,
                     color=color_dict_rect[ind % 17],
                     linewidth=3,
                     ec="red",
                     zorder=0)

        for i, txt in enumerate([i for i in range(len(self.X))]):
            self.ax1.annotate(txt, (self.X[:, 0][i], self.X[:, 1][i]),
                              fontsize=10,
                              size=10,
                              ha='center',
                              va='center')

        self.log.appendPlainText("")
        self.log.appendPlainText("n° clust: " + str(len(a)))
        self.log.appendPlainText("dist_tot: " + str(round(level_txt, 5)))
        if level2_txt is not None:
            self.log.appendPlainText("dist_incr: " + str(round(level2_txt, 5)))

        self.canvas_up.draw()

        if save_plots is True:
            self.canvas_up.figure.savefig(
                './Images/{}_{:02}/fig_{:02}.png'.format(
                    self.name, self.ind_run, ind_fig))

        QCoreApplication.processEvents()
예제 #3
0
    def point_plot_mod2_gui(
        self,
        data,
        a,
        reps,
        ax,
        canvas,
        level_txt,
        level2_txt=None,
        par_index=None,
        u=None,
        u_cl=None,
        initial_ind=None,
        last_reps=None,
        not_sampled=None,
        not_sampled_ind=None,
        n_rep_fin=None,
        save_plots=False,
        ind_fig=None,
        ind_fig_bis=None,
    ):
        """
        Scatter-plot of input data points, colored according to the cluster they belong to.
        A rectangle with red borders is displayed around the last merged cluster; representative points
        of last merged cluster are also plotted in red, along with the center of mass, plotted as a
        red cross. The current number of clusters and current distance are also displayed in the right
        upper corner.
        In the last phase of CURE algorithm variation for large datasets, arrows are
        displayed from every not sampled point to its closest representative point; moreover, representative
        points are surrounded by small circles, to make them more visible. Representative points of different
        clusters are plotted in different nuances of red.

        :param a: input dataframe built by CURE algorithm, listing the cluster and the x and y
                  coordinates of each point.
        :param reps: list of the coordinates of representative points.
        :param level_txt: distance at which current merging occurs displayed in the upper right corner.
        :param level2_txt: incremental distance (not used).
        :param par_index: partial index to take the shuffling of indexes into account.
        :param u: first cluster to be merged.
        :param u_cl: second cluster to be merged.
        :param initial_ind: initial partial index.
        :param last_reps: dictionary of last representative points.
        :param not_sampled: coordinates of points that have not been initially sampled, in the large dataset version.
        :param not_sampled_ind: indexes of not_sampled point_indices.
        :param n_rep_fin: number of representatives to use for each cluster in the final assignment phase in the large
                          dataset version.
        :return list_keys_diz: if par_index is not None, returns the new indexes of par_index.

        """
        ax.cla()
        if ind_fig_bis is not None:
            ax.set_title("CURE partition {}".format(ind_fig_bis + 1))
        else:
            ax.set_title("CURE final step")
        # diz is used to take the shuffling of data into account, e.g. if the first row doesn'#
        # correspond to point 0: this is useful for the large dataset version of CURE, where data points
        # are randomly sampled, but the initial indices are kept to be plotted.
        if par_index is not None:
            diz = dict(zip(par_index, [i for i in range(len(par_index))]))

        # points that still need to be processed are plotted in lime color
        ax.scatter(data[:, 0],
                   data[:, 1],
                   s=300,
                   color="lime",
                   edgecolor="black")

        # drops the totally null columns, so that the number of columns goes to 2*(cardinality of biggest cluster)
        a = a.dropna(1, how="all")

        colors = {
            0: "seagreen",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
        }

        color_dict_rect = convert_colors(colors, alpha=0.3)

        # to speed things up, this splits all points inside the clusters' names, and start gives the starting index
        # that shows where clusters with more than 1 element start (because they are always appended to a)
        len_ind = [len(i.split("-")) for i in list(a.index)]
        start = np.min([i for i in range(len(len_ind)) if len_ind[i] > 1])

        # for each cluster, take the single points composing it and plot them in the appropriate color, if
        # necessary taking the labels of par_index into account
        for ind, i in enumerate(range(start, len(a))):
            point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
            if par_index is not None:
                X_clust = [data[diz[point[j]], 0] for j in range(len(point))]
                Y_clust = [data[diz[point[j]], 1] for j in range(len(point))]

                ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])
            else:
                point = [int(i) for i in point]
                X_clust = [data[point[j], 0] for j in range(len(point))]
                Y_clust = [data[point[j], 1] for j in range(len(point))]

                ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])

        # last merged cluster, so the last element of matrix a
        point = a.iloc[-1].name.replace("(", "").replace(")", "").split("-")
        # finding the new center of mass the newly merged cluster
        if par_index is not None:
            point = [diz[point[i]] for i in range(len(point))]
            com = data[point].mean(axis=0)
        else:
            point = [int(i) for i in point]
            com = data[point].mean(axis=0)

        # plotting the center of mass, marked with an X
        ax.scatter(com[0],
                   com[1],
                   s=400,
                   color="r",
                   marker="X",
                   edgecolor="black")

        # plotting representative points in red
        x_reps = [i[0] for i in reps]
        y_reps = [i[1] for i in reps]
        ax.scatter(x_reps, y_reps, s=360, color="r", edgecolor="black")

        # finding the right measures for the rectangle
        rect_min = data[point].min(axis=0)
        rect_diff = data[point].max(axis=0) - rect_min

        xwidth = ax.axis()[1] - ax.axis()[0]
        ywidth = ax.axis()[3] - ax.axis()[2]

        # adding the rectangle, using two rectangles one above the other to use different colors
        # for the border and for the inside
        if len(point) <= 5:

            ax.add_patch(
                Rectangle(
                    (rect_min[0] - xwidth * 0.02, rect_min[1] - ywidth * 0.04),
                    rect_diff[0] + xwidth * 0.04,
                    rect_diff[1] + ywidth * 0.08,
                    fill=True,
                    color=color_dict_rect[ind % 18],
                    linewidth=3,
                    ec="red",
                ))
        else:
            encircle(
                X_clust,
                Y_clust,
                ax=ax,
                color=color_dict_rect[ind % 18],
                linewidth=3,
                ec="red",
                zorder=0,
            )

        # adding labels to points in the plot

        if initial_ind is not None:
            for i, txt in enumerate(initial_ind):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )
        else:
            for i, txt in enumerate([i for i in range(len(data))]):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )

        # adding the annotations
        self.log.appendPlainText("")
        self.log.appendPlainText("min_dist: " + str(round(level_txt, 5)))

        if level2_txt is not None:
            self.log.appendPlainText("dist_incr: " + str(round(level2_txt, 5)))

        self.log.appendPlainText("n° clust: " + str(len(a)))

        canvas.draw()

        if save_plots is True:

            if ind_fig_bis is not None:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_{:02}_{:02}.png".format(
                        self.name, self.ind_run, ind_fig_bis, ind_fig))
            else:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_fin_{:02}.png".format(
                        self.name, self.ind_run, ind_fig))

        QCoreApplication.processEvents()

        # everything down from here refers to the last phase of the large dataset version, the assignment phase
        if last_reps is not None:

            # plot all the points in color lime
            ax.scatter(data[:, 0],
                       data[:, 1],
                       s=300,
                       color="lime",
                       edgecolor="black")

            # find the centers of mass of the clusters using the matrix a to find which points belong to
            # which cluster
            coms = []
            for ind, i in enumerate(range(0, len(a))):
                point = a.iloc[i].name.replace("(", "").replace(")",
                                                                "").split("-")
                for j in range(len(point)):
                    ax.scatter(
                        data[diz[point[j]], 0],
                        data[diz[point[j]], 1],
                        s=350,
                        color=colors[ind % 18],
                    )
                point = [diz[point[i]] for i in range(len(point))]
                coms.append(data[point].mean(axis=0))

            # variations of red to plot the representative points of the various clusters
            colors_reps = [
                "red",
                "crimson",
                "indianred",
                "lightcoral",
                "salmon",
                "darksalmon",
                "firebrick",
            ]

            # flattening the last_reps values
            flat_reps = [
                item for sublist in list(last_reps.values())
                for item in sublist
            ]

            # plotting the representatives, surrounded by small circles, and the centers of mass, marked with X
            for i in range(len(last_reps)):
                len_rep = len(list(last_reps.values())[i])

                x = [
                    list(last_reps.values())[i][j][0]
                    for j in range(min(n_rep_fin, len_rep))
                ]
                y = [
                    list(last_reps.values())[i][j][1]
                    for j in range(min(n_rep_fin, len_rep))
                ]

                ax.scatter(x,
                           y,
                           s=400,
                           color=colors_reps[i % 7],
                           edgecolor="black",
                           zorder=10)
                ax.scatter(
                    coms[i][0],
                    coms[i][1],
                    s=400,
                    color=colors_reps[i % 7],
                    marker="X",
                    edgecolor="black",
                )

                for num in range(min(n_rep_fin, len_rep)):
                    ax.add_artist(
                        plt.Circle(
                            (x[num], y[num]),
                            xwidth * 0.03,
                            color=colors_reps[i % 7],
                            fill=False,
                            linewidth=3,
                            alpha=0.7,
                        ))

                ax.scatter(
                    not_sampled[:, 0],
                    not_sampled[:, 1],
                    s=400,
                    color="lime",
                    edgecolor="black",
                )

            # find the closest representative for not sampled points, and draw an arrow connecting the points
            # to its closest representative
            for ind in range(len(not_sampled)):
                dist_int = []
                for el in flat_reps:
                    dist_int.append(dist1(not_sampled[ind], el))
                ind_min = np.argmin(dist_int)

                ax.arrow(
                    not_sampled[ind][0],
                    not_sampled[ind][1],
                    flat_reps[ind_min][0] - not_sampled[ind][0],
                    flat_reps[ind_min][1] - not_sampled[ind][1],
                    length_includes_head=True,
                    head_width=0.03,
                    head_length=0.05,
                )

            # plotting the indexes for each point
            for i, txt in enumerate(initial_ind):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )

            if not_sampled_ind is not None:
                for i, txt in enumerate(not_sampled_ind):
                    ax.annotate(
                        txt,
                        (not_sampled[:, 0][i], not_sampled[:, 1][i]),
                        fontsize=10,
                        size=10,
                        ha="center",
                        va="center",
                    )

            canvas.draw()

            if save_plots is True:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_fin_{:02}.png".format(
                        self.name, self.ind_run, ind_fig + 1))

            QCoreApplication.processEvents()

        # if par_index is not None, diz is updated with the last merged cluster and its keys are returned
        if par_index is not None:
            diz["(" + u + ")" + "-" + "(" + u_cl + ")"] = len(diz)
            list_keys_diz = list(diz.keys())

            return list_keys_diz
예제 #4
0
def point_plot_mod(X, a, level_txt, level2_txt=None):
    """
    Scatter plot of data points, colored according to the cluster they belong to. The most recently
    merged cluster is enclosed in a rectangle of the same color as its points, with red borders.
    In the top right corner, the total distance is shown, along with the current number of clusters.
    When using Ward linkage, also the increment in distance is shown.

    :param X: input data as array.
    :param a: distance matrix built by agg_clust/agg_clust_mod.
    :param level_txt: dist_tot displayed.
    :param level2_txt: dist_incr displayed.
    """

    fig, ax = plt.subplots(figsize=(14, 6))

    plt.scatter(X[:, 0], X[:, 1], s=300, color="lime", edgecolor="black")

    a = a.dropna(1, how="all")

    color_dict = {
        0: "seagreen",
        1: 'lightcoral',
        2: 'yellow',
        3: 'grey',
        4: 'pink',
        5: 'navy',
        6: 'orange',
        7: 'purple',
        8: 'salmon',
        9: 'olive',
        10: 'brown',
        11: 'tan',
        12: 'plum',
        13: 'red',
        14: 'lightblue',
        15: "khaki",
        16: "gainsboro",
        17: "peachpuff"
    }

    color_dict_rect = convert_colors(color_dict, alpha=0.3)

    len_ind = [len(i.split("-")) for i in list(a.index)]
    start = np.min([i for i in range(len(len_ind)) if len_ind[i] > 1])

    for ind, i in enumerate(range(start, len(a))):
        point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
        point = [int(i) for i in point]

        X_clust = [X[point[j], 0] for j in range(len(point))]
        Y_clust = [X[point[j], 1] for j in range(len(point))]

        # if ind != len(a) - 1:
        plt.scatter(X_clust, Y_clust, s=350, color=color_dict[ind % 17])

    point = a.iloc[-1].name.replace("(", "").replace(")", "").split("-")
    point = [int(i) for i in point]
    rect_min = X[point].min(axis=0)
    rect_diff = X[point].max(axis=0) - rect_min

    xmin, xmax, ymin, ymax = plt.axis()
    xwidth = xmax - xmin
    ywidth = ymax - ymin

    if len(X_clust) <= 2:

        ax.add_patch(
            Rectangle(
                (rect_min[0] - xwidth * 0.02, rect_min[1] - ywidth * 0.04),
                rect_diff[0] + xwidth * 0.04,
                rect_diff[1] + ywidth * 0.08,
                fill=True,
                color=color_dict_rect[ind % 17],
                linewidth=3,
                ec="red"))
    else:
        encircle(X_clust,
                 Y_clust,
                 ax=ax,
                 color=color_dict_rect[ind % 17],
                 linewidth=3,
                 ec="red")

    # plt.scatter(X_clust, Y_clust, s=350, color=color_dict[(len(a)-1) % 17])

    for i, txt in enumerate([i for i in range(len(X))]):
        ax.annotate(txt, (X[:, 0][i], X[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha='center',
                    va='center')

    ax.annotate("dist_tot: " + str(round(level_txt, 5)),
                (xmax * 0.75, ymax * 0.9),
                fontsize=12,
                size=12)

    if level2_txt is not None:
        ax.annotate("dist_incr: " + str(round(level2_txt, 5)),
                    (xmax * 0.75, ymax * 0.8),
                    fontsize=12,
                    size=12)

    ax.annotate("n° clust: " + str(len(a)), (xmax * 0.75, ymax * 0.7),
                fontsize=12,
                size=12)

    plt.show()