def plot2d_data_gui(self,
                        df,
                        canvas,
                        ax,
                        save_plots,
                        ind_fig=None,
                        col_i=None):

        if self.delay != 0:
            pause_execution(self.delay)

        ax.clear()
        ax.set_title(self.name + " Merging")

        colors = {
            0: "seagreen",
            1: "dodgerblue",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
            18: "lime",
            19: "peru",
            20: "beige",
            21: "teal",
            22: "royalblue",
            23: "tomato",
            24: "bisque",
            25: "palegreen",
        }

        color_list = [colors[i] for i in df["cluster"]]

        df.plot(kind="scatter", c=color_list, x=0, y=1, ax=ax, s=100)

        ax.set_xlabel("")
        ax.set_ylabel("")

        if col_i is not None:
            ax.scatter(
                df[df.cluster == col_i].iloc[:, 0],
                df[df.cluster == col_i].iloc[:, 1],
                color="black",
                s=140,
                edgecolors="white",
                alpha=0.8,
            )

        canvas.draw()

        if save_plots is True:
            canvas.figure.savefig(
                appctxt.get_resource("Images/") + "/" +
                "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run,
                                                ind_fig))

        QCoreApplication.processEvents()
예제 #2
0
    def plot_clust_DB_gui(self,
                          save_plots=False,
                          circle_class=None,
                          noise_circle=False):
        """
        Scatter plot of the data points, colored according to the cluster they belong to; circle_class Plots
        circles around some or all points, with a radius of eps; if Noise_circle is True, circle are also plotted
        around noise points.


        :param circle_class: if True, plots circles around every non-noise point, else plots circles
                             only around points belonging to certain clusters, e.g. circle_class = [1,2] will
                             plot circles around points belonging to clusters 1 and 2.
        :param noise_circle: if True, plots circles around noise points
        :param save_plots: if True, saves the plot.

        """
        # create dictionary of X
        X_dict = dict(zip([str(i) for i in range(len(self.X))], self.X))

        # create new dictionary of X, adding the cluster label
        new_dict = {
            key: (val1, self.ClustDict[key])
            for key, val1 in zip(list(X_dict.keys()), list(X_dict.values()))
        }

        new_dict = OrderedDict(
            (k, new_dict[k]) for k in list(self.ClustDict.keys()))

        df = pd.DataFrame(
            dict(
                x=[i[0][0] for i in list(new_dict.values())],
                y=[i[0][1] for i in list(new_dict.values())],
                label=[i[1] for i in list(new_dict.values())],
            ),
            index=new_dict.keys(),
        )

        colors = {
            -1: "red",
            0: "lightblue",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "navy",
            6: "orange",
            7: "purple",
            8: "salmon",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "lime",
        }

        lista_lab = list(df.label.value_counts().index)

        # plot points colored according to the cluster they belong to
        for lab in lista_lab:
            df_sub = df[df.label == lab]
            self.ax1.scatter(
                df_sub.x,
                df_sub.y,
                color=colors[lab % 12] if lab != -1 else colors[-1],
                s=300,
                edgecolor="black",
                label=lab,
            )

        # plot circles around noise, colored according to the cluster they belong to
        if noise_circle == True:

            df_noise = df[df.label == -1]

            for i in range(len(df_noise)):
                self.ax1.add_artist(
                    plt.Circle(
                        (df_noise["x"].iloc[i], df_noise["y"].iloc[i]),
                        self.eps,
                        color="r",
                        fill=False,
                        linewidth=3,
                        alpha=0.7,
                    ))

        # plot circles around points, colored according to the cluster they belong to
        if circle_class is not None:
            # around every points or only around specified clusters
            if circle_class != "true":
                lista_lab = circle_class

            for lab in lista_lab:

                if lab != -1:

                    df_temp = df[df.label == lab]

                    for i in range(len(df_temp)):
                        self.ax1.add_artist(
                            plt.Circle(
                                (df_temp["x"].iloc[i], df_temp["y"].iloc[i]),
                                self.eps,
                                color=colors[lab],
                                fill=False,
                                linewidth=3,
                                alpha=0.7,
                            ))

        self.ax1.set_title("DBSCAN Cluster Plot")
        self.ax1.set_xlabel("")
        self.ax1.set_ylabel("")

        for i, txt in enumerate([i for i in range(len(self.X))]):
            self.ax1.annotate(
                txt,
                (self.X[:, 0][i], self.X[:, 1][i]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        # self.ax1.set_aspect('equal')
        self.ax1.legend()
        self.canvas_up.draw()

        if save_plots is True:
            self.canvas_up.figure.savefig(
                appctxt.get_resource("Images/") +
                "/" + "{}_{:02}/fig_fin_{:02}.png".format(
                    self.name, self.ind_run, self.ind_extr_fig))

        QCoreApplication.processEvents()
    def plot2d_graph_gui(self,
                         graph,
                         canvas,
                         ax,
                         save_plots,
                         ind_fig=None,
                         print_clust=True):

        if self.delay != 0:
            pause_execution(self.delay)

        ax.clear()
        ax.set_title(self.name + " Graph Clustering")

        pos = nx.get_node_attributes(graph, "pos")
        colors = {
            0: "seagreen",
            1: "dodgerblue",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
            18: "lime",
            19: "peru",
            20: "beige",
            21: "teal",
            22: "royalblue",
            23: "tomato",
            24: "bisque",
            25: "palegreen",
        }

        el = nx.get_node_attributes(graph, "cluster").values()
        cmc = Counter(el).most_common()
        c = [colors[i % len(colors)] for i in el]

        if print_clust is True:
            self.log.appendPlainText("clusters: {}".format(cmc))

        if len(el) != 0:  # is set
            # print(pos)
            nx.draw(graph,
                    pos,
                    node_color=c,
                    node_size=60,
                    edgecolors="black",
                    ax=ax)
        else:
            nx.draw(graph, pos, node_size=60, edgecolors="black", ax=ax)

        canvas.draw()

        if save_plots is True:
            canvas.figure.savefig(
                appctxt.get_resource("Images/") + "/" +
                "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run,
                                                ind_fig))

        QCoreApplication.processEvents()
예제 #4
0
    def point_plot_mod2_gui(
        self,
        data,
        a,
        reps,
        ax,
        canvas,
        level_txt,
        level2_txt=None,
        par_index=None,
        u=None,
        u_cl=None,
        initial_ind=None,
        last_reps=None,
        not_sampled=None,
        not_sampled_ind=None,
        n_rep_fin=None,
        save_plots=False,
        ind_fig=None,
        ind_fig_bis=None,
    ):
        """
        Scatter-plot of input data points, colored according to the cluster they belong to.
        A rectangle with red borders is displayed around the last merged cluster; representative points
        of last merged cluster are also plotted in red, along with the center of mass, plotted as a
        red cross. The current number of clusters and current distance are also displayed in the right
        upper corner.
        In the last phase of CURE algorithm variation for large datasets, arrows are
        displayed from every not sampled point to its closest representative point; moreover, representative
        points are surrounded by small circles, to make them more visible. Representative points of different
        clusters are plotted in different nuances of red.

        :param a: input dataframe built by CURE algorithm, listing the cluster and the x and y
                  coordinates of each point.
        :param reps: list of the coordinates of representative points.
        :param level_txt: distance at which current merging occurs displayed in the upper right corner.
        :param level2_txt: incremental distance (not used).
        :param par_index: partial index to take the shuffling of indexes into account.
        :param u: first cluster to be merged.
        :param u_cl: second cluster to be merged.
        :param initial_ind: initial partial index.
        :param last_reps: dictionary of last representative points.
        :param not_sampled: coordinates of points that have not been initially sampled, in the large dataset version.
        :param not_sampled_ind: indexes of not_sampled point_indices.
        :param n_rep_fin: number of representatives to use for each cluster in the final assignment phase in the large
                          dataset version.
        :return list_keys_diz: if par_index is not None, returns the new indexes of par_index.

        """
        ax.cla()
        if ind_fig_bis is not None:
            ax.set_title("CURE partition {}".format(ind_fig_bis + 1))
        else:
            ax.set_title("CURE final step")
        # diz is used to take the shuffling of data into account, e.g. if the first row doesn'#
        # correspond to point 0: this is useful for the large dataset version of CURE, where data points
        # are randomly sampled, but the initial indices are kept to be plotted.
        if par_index is not None:
            diz = dict(zip(par_index, [i for i in range(len(par_index))]))

        # points that still need to be processed are plotted in lime color
        ax.scatter(data[:, 0],
                   data[:, 1],
                   s=300,
                   color="lime",
                   edgecolor="black")

        # drops the totally null columns, so that the number of columns goes to 2*(cardinality of biggest cluster)
        a = a.dropna(1, how="all")

        colors = {
            0: "seagreen",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
        }

        color_dict_rect = convert_colors(colors, alpha=0.3)

        # to speed things up, this splits all points inside the clusters' names, and start gives the starting index
        # that shows where clusters with more than 1 element start (because they are always appended to a)
        len_ind = [len(i.split("-")) for i in list(a.index)]
        start = np.min([i for i in range(len(len_ind)) if len_ind[i] > 1])

        # for each cluster, take the single points composing it and plot them in the appropriate color, if
        # necessary taking the labels of par_index into account
        for ind, i in enumerate(range(start, len(a))):
            point = a.iloc[i].name.replace("(", "").replace(")", "").split("-")
            if par_index is not None:
                X_clust = [data[diz[point[j]], 0] for j in range(len(point))]
                Y_clust = [data[diz[point[j]], 1] for j in range(len(point))]

                ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])
            else:
                point = [int(i) for i in point]
                X_clust = [data[point[j], 0] for j in range(len(point))]
                Y_clust = [data[point[j], 1] for j in range(len(point))]

                ax.scatter(X_clust, Y_clust, s=350, color=colors[ind % 18])

        # last merged cluster, so the last element of matrix a
        point = a.iloc[-1].name.replace("(", "").replace(")", "").split("-")
        # finding the new center of mass the newly merged cluster
        if par_index is not None:
            point = [diz[point[i]] for i in range(len(point))]
            com = data[point].mean(axis=0)
        else:
            point = [int(i) for i in point]
            com = data[point].mean(axis=0)

        # plotting the center of mass, marked with an X
        ax.scatter(com[0],
                   com[1],
                   s=400,
                   color="r",
                   marker="X",
                   edgecolor="black")

        # plotting representative points in red
        x_reps = [i[0] for i in reps]
        y_reps = [i[1] for i in reps]
        ax.scatter(x_reps, y_reps, s=360, color="r", edgecolor="black")

        # finding the right measures for the rectangle
        rect_min = data[point].min(axis=0)
        rect_diff = data[point].max(axis=0) - rect_min

        xwidth = ax.axis()[1] - ax.axis()[0]
        ywidth = ax.axis()[3] - ax.axis()[2]

        # adding the rectangle, using two rectangles one above the other to use different colors
        # for the border and for the inside
        if len(point) <= 5:

            ax.add_patch(
                Rectangle(
                    (rect_min[0] - xwidth * 0.02, rect_min[1] - ywidth * 0.04),
                    rect_diff[0] + xwidth * 0.04,
                    rect_diff[1] + ywidth * 0.08,
                    fill=True,
                    color=color_dict_rect[ind % 18],
                    linewidth=3,
                    ec="red",
                ))
        else:
            encircle(
                X_clust,
                Y_clust,
                ax=ax,
                color=color_dict_rect[ind % 18],
                linewidth=3,
                ec="red",
                zorder=0,
            )

        # adding labels to points in the plot

        if initial_ind is not None:
            for i, txt in enumerate(initial_ind):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )
        else:
            for i, txt in enumerate([i for i in range(len(data))]):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )

        # adding the annotations
        self.log.appendPlainText("")
        self.log.appendPlainText("min_dist: " + str(round(level_txt, 5)))

        if level2_txt is not None:
            self.log.appendPlainText("dist_incr: " + str(round(level2_txt, 5)))

        self.log.appendPlainText("n° clust: " + str(len(a)))

        canvas.draw()

        if save_plots is True:

            if ind_fig_bis is not None:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_{:02}_{:02}.png".format(
                        self.name, self.ind_run, ind_fig_bis, ind_fig))
            else:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_fin_{:02}.png".format(
                        self.name, self.ind_run, ind_fig))

        QCoreApplication.processEvents()

        # everything down from here refers to the last phase of the large dataset version, the assignment phase
        if last_reps is not None:

            # plot all the points in color lime
            ax.scatter(data[:, 0],
                       data[:, 1],
                       s=300,
                       color="lime",
                       edgecolor="black")

            # find the centers of mass of the clusters using the matrix a to find which points belong to
            # which cluster
            coms = []
            for ind, i in enumerate(range(0, len(a))):
                point = a.iloc[i].name.replace("(", "").replace(")",
                                                                "").split("-")
                for j in range(len(point)):
                    ax.scatter(
                        data[diz[point[j]], 0],
                        data[diz[point[j]], 1],
                        s=350,
                        color=colors[ind % 18],
                    )
                point = [diz[point[i]] for i in range(len(point))]
                coms.append(data[point].mean(axis=0))

            # variations of red to plot the representative points of the various clusters
            colors_reps = [
                "red",
                "crimson",
                "indianred",
                "lightcoral",
                "salmon",
                "darksalmon",
                "firebrick",
            ]

            # flattening the last_reps values
            flat_reps = [
                item for sublist in list(last_reps.values())
                for item in sublist
            ]

            # plotting the representatives, surrounded by small circles, and the centers of mass, marked with X
            for i in range(len(last_reps)):
                len_rep = len(list(last_reps.values())[i])

                x = [
                    list(last_reps.values())[i][j][0]
                    for j in range(min(n_rep_fin, len_rep))
                ]
                y = [
                    list(last_reps.values())[i][j][1]
                    for j in range(min(n_rep_fin, len_rep))
                ]

                ax.scatter(x,
                           y,
                           s=400,
                           color=colors_reps[i % 7],
                           edgecolor="black",
                           zorder=10)
                ax.scatter(
                    coms[i][0],
                    coms[i][1],
                    s=400,
                    color=colors_reps[i % 7],
                    marker="X",
                    edgecolor="black",
                )

                for num in range(min(n_rep_fin, len_rep)):
                    ax.add_artist(
                        plt.Circle(
                            (x[num], y[num]),
                            xwidth * 0.03,
                            color=colors_reps[i % 7],
                            fill=False,
                            linewidth=3,
                            alpha=0.7,
                        ))

                ax.scatter(
                    not_sampled[:, 0],
                    not_sampled[:, 1],
                    s=400,
                    color="lime",
                    edgecolor="black",
                )

            # find the closest representative for not sampled points, and draw an arrow connecting the points
            # to its closest representative
            for ind in range(len(not_sampled)):
                dist_int = []
                for el in flat_reps:
                    dist_int.append(dist1(not_sampled[ind], el))
                ind_min = np.argmin(dist_int)

                ax.arrow(
                    not_sampled[ind][0],
                    not_sampled[ind][1],
                    flat_reps[ind_min][0] - not_sampled[ind][0],
                    flat_reps[ind_min][1] - not_sampled[ind][1],
                    length_includes_head=True,
                    head_width=0.03,
                    head_length=0.05,
                )

            # plotting the indexes for each point
            for i, txt in enumerate(initial_ind):
                ax.annotate(
                    txt,
                    (data[:, 0][i], data[:, 1][i]),
                    fontsize=10,
                    size=10,
                    ha="center",
                    va="center",
                )

            if not_sampled_ind is not None:
                for i, txt in enumerate(not_sampled_ind):
                    ax.annotate(
                        txt,
                        (not_sampled[:, 0][i], not_sampled[:, 1][i]),
                        fontsize=10,
                        size=10,
                        ha="center",
                        va="center",
                    )

            canvas.draw()

            if save_plots is True:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_fin_{:02}.png".format(
                        self.name, self.ind_run, ind_fig + 1))

            QCoreApplication.processEvents()

        # if par_index is not None, diz is updated with the last merged cluster and its keys are returned
        if par_index is not None:
            diz["(" + u + ")" + "-" + "(" + u_cl + ")"] = len(diz)
            list_keys_diz = list(diz.keys())

            return list_keys_diz
예제 #5
0
    def point_plot_mod_gui(self,
                           X_dict,
                           point,
                           save_plots=False,
                           ind_fig=None):
        """
        Plots a scatter plot of points, where the point (x,y) is light black and
        surrounded by a red circle of radius eps, where already processed point are plotted
        according to ClustDict and without edgecolor, whereas still-to-process points are green
        with black edgecolor.

        :param X_dict: input dictionary version of self.X.
        :param point: coordinates of the point that is currently inspected.
        :param ind_fig: index of the current plot.
        :param save_plots: if True, saves the plot.

        """

        colors = {
            -1: "red",
            0: "lightblue",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "navy",
            6: "orange",
            7: "purple",
            8: "salmon",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "lime",
        }

        self.ax1.cla()
        self.ax1.set_title("DBSCAN procedure")

        # plot scatter points in color lime
        self.ax1.scatter(self.X[:, 0],
                         self.X[:, 1],
                         s=300,
                         color="lime",
                         edgecolor="black")

        # plot colors according to clusters
        for i in self.ClustDict:
            self.ax1.scatter(
                X_dict[i][0],
                X_dict[i][1],
                color=colors[self.ClustDict[i] %
                             12] if self.ClustDict[i] != -1 else colors[-1],
                s=300,
            )

        # plot the last added point bigger and black, with a red circle surrounding it
        self.ax1.scatter(x=X_dict[point][0],
                         y=X_dict[point][1],
                         s=400,
                         color="black",
                         alpha=0.4)

        circle1 = plt.Circle(
            (X_dict[point][0], X_dict[point][1]),
            self.eps,
            color="r",
            fill=False,
            linewidth=3,
            alpha=0.7,
        )
        self.ax1.add_artist(circle1)

        for i, txt in enumerate([i for i in range(len(self.X))]):
            self.ax1.annotate(
                txt,
                (self.X[:, 0][i], self.X[:, 1][i]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        # self.ax1.set_aspect('equal')
        # self.ax1.legend(fontsize=8)

        self.canvas_up.draw()
        if save_plots is True:
            self.canvas_up.figure.savefig(
                appctxt.get_resource("Images/") + "/" +
                "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run,
                                                ind_fig))

        QCoreApplication.processEvents()
예제 #6
0
def plot_tree_fin_gui(tree,
                      log,
                      ind_run,
                      ind_fig,
                      label_graphviz,
                      save_plots=False,
                      info=True):
    """
    Plot the final CFtree built by BIRCH. Leaves are colored, and every node displays the
    total number of elements in its child nodes.

    :param tree: tree built during BIRCH algorithm execution.
    :param info: if True, tree height, number of nodes, leaves and entries are printed.
    """
    height = tree.height

    if info is True:
        log.appendPlainText("Tree height is {0}".format(height))
        log.appendPlainText("Number of nodes: {0}".format(tree.amount_nodes))
        log.appendPlainText("Number of leaves: {0}".format(len(tree.leafes)))
        log.appendPlainText("Number of entries: {0}".format(
            tree.amount_entries))

    if tree.amount_nodes > 2704:
        log.appendPlainText("Too many nodes, limit is 2704")

        return

    colors = {
        0: "seagreen",
        1: "forestgreen",
        2: "yellow",
        3: "grey",
        4: "pink",
        5: "turquoise",
        6: "orange",
        7: "purple",
        8: "yellowgreen",
        9: "red",
        10: "cyan",
        11: "tan",
        12: "plum",
        13: "rosybrown",
        14: "lightblue",
        15: "khaki",
        16: "gainsboro",
        17: "peachpuff",
    }

    def feat_create(level_nodes):
        """
        Auxiliary function that returns for each node level the features, the number
        of points and the successors
        """
        features = []
        features_num = []
        succ_num = []
        for el in level_nodes:
            f = el.feature
            features.append(f)
            features_num.append(f.number_points)
            try:
                succ_num.append(len(el.successors))
            except:
                pass

        return features, features_num, succ_num

    # collecting data for each tree level except bottom
    feat = []
    feat_num = []
    succ_num = []
    for lev in range(height):
        (f1, f2, s1) = feat_create(tree.get_level_nodes(lev))
        feat.append(f1)
        feat_num.append(f2)
        succ_num.append(s1)

    # collect data of leaves
    single_entries = []
    for z in tree.get_level_nodes(height - 1):
        sing_ent_prov = []
        for single_entry in z.entries:
            sing_ent_prov.append(single_entry.number_points)
        single_entries.append(sing_ent_prov)

    # creating names for nodes
    prov = ("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c "
            "d e f g h i j k l m n o p q r s t u v w x y z".split(" "))
    lett = []
    for i in range(len(prov)):
        for j in range(len(prov)):
            lett.append(prov[i] + prov[j])

    # creating the tree
    dot = graphviz.Digraph(comment="Clustering", format="png")
    # root
    dot.node(lett[0], str(feat_num[0][0]))

    # all other levels
    placeholder = 0

    for level in range(1, height + 1):
        # all levels between root and leaves
        if level != height:
            for q in range(1, len(feat_num[level]) + 1):
                dot.node(lett[placeholder + q], str(feat_num[level][q - 1]))
            placeholder += q
        # leaves with colors
        else:
            for q in range(1, len(single_entries) + 1):
                dot.node(
                    lett[placeholder + q],
                    str(single_entries[q - 1]),
                    color=colors[(q - 1) % 17],
                    style="filled",
                )

    # adding edges between nodes
    a = 0
    b = 0
    # for all nodes except leaves
    for level in range(0, height):
        for num_succs in succ_num[level]:
            for el in range(num_succs):
                dot.edge(lett[a], lett[b + el + 1])
            a += 1
            b += el + 1
    # for leaves
    for i in range(len(single_entries)):
        dot.edge(lett[a], lett[b + i + 1])
        a += 1

    # graph = graphviz.Source(dot)
    # graph.view()

    graph_path = dot.render(filename=appctxt.get_resource("Images/") + "/" +
                            "BIRCH_{:02}/graph_{:02}".format(ind_run, ind_fig))

    pixmap = QPixmap(graph_path)
    label_graphviz.setScaledContents(True)
    label_graphviz.setPixmap(pixmap)

    folder = appctxt.get_resource("Images/") + "/" + "BIRCH_{:02}".format(
        ind_run)
    if save_plots is False:
        rmtree(folder)

    QCoreApplication.processEvents()
예제 #7
0
def plot_birch_leaves_gui(tree,
                          data,
                          ax,
                          canvas,
                          ind_run,
                          ind_fig,
                          name="BIRCH",
                          save_plots=False):
    """
    Scatter plot of data point, with colors according to the leaf the belong to. Points in the same entry in a leaf
    are represented by a cross, with the number of points over it.

    :param tree: tree built during BIRCH algorithm execution.
    :param data: input data as array of list of list

    """
    ax.clear()
    if ind_fig is not None:
        ax.set_title("{} run number {}".format(name, ind_fig + 1))
    else:
        ax.set_title("{} final clustering".format(name))

    colors = {
        0: "seagreen",
        1: "forestgreen",
        2: "yellow",
        3: "grey",
        4: "pink",
        5: "turquoise",
        6: "orange",
        7: "purple",
        8: "yellowgreen",
        9: "red",
        10: "cyan",
        11: "tan",
        12: "plum",
        13: "rosybrown",
        14: "lightblue",
        15: "khaki",
        16: "gainsboro",
        17: "peachpuff",
    }

    # plot every point in white with white edgecolor (invisible)
    ax.scatter(
        np.array(data)[:, 0],
        np.array(data)[:, 1],
        s=300,
        color="white",
        edgecolor="white",
    )

    # for every leaf
    for i, el in enumerate(tree.get_level_nodes(tree.height - 1)):
        # for every entry in the leaf
        for entry in el.entries:
            # if it is a single point, plot it with its color
            if entry.number_points == 1:
                ax.scatter(
                    entry.linear_sum[0],
                    entry.linear_sum[1],
                    color=colors[i % 18],
                    s=300,
                    edgecolor="black",
                )
            # else, plot the entry centroid as a cross and leave the points white
            else:
                ax.scatter(
                    entry.get_centroid()[0],
                    entry.get_centroid()[1],
                    color=colors[i % 18],
                    marker="X",
                    s=200,
                )
                ax.annotate(
                    entry.number_points,
                    (entry.get_centroid()[0], entry.get_centroid()[1]),
                    fontsize=18,
                )

    # plot indexes of points in plot
    for i, txt in enumerate(range(len(data))):
        ax.annotate(
            txt,
            (np.array(data)[:, 0][i], np.array(data)[:, 1][i]),
            fontsize=10,
            size=10,
            ha="center",
            va="center",
        )

    canvas.draw()

    if save_plots is True:
        if ind_fig is not None:
            canvas.figure.savefig(
                appctxt.get_resource("Images/") + "/" +
                "{}_{:02}/fig_{:02}.png".format(name, ind_run, ind_fig))
        else:
            canvas.figure.savefig(
                appctxt.get_resource("Images/") + "/" +
                "{}_{:02}/fig_fin.png".format(name, ind_run))

    QCoreApplication.processEvents()
예제 #8
0
    def reach_plot_gui(self, data, save_plots=False, ind_fig=None):
        """
        Plots the reachability plot, along with a horizontal line denoting eps,
        from the ClustDist produced by OPTICS.

        :param data: input dictionary.
        :param ind_fig: index of the figure that is saved.
        :param save_plots: if True, the produced image is saved.
        """

        plot_dic = {}

        # create dictionary for reachability plot, keys will be the bar labels and the value will be the height
        # if the value is infinity, the height will be eps*1.15 by default
        for key, value in self.ClustDist.items():

            if np.isinf(value) == True:

                plot_dic[key] = self.eps * 1.15

            else:

                plot_dic[key] = self.ClustDist[key]

        missing_keys = list(set(data.keys()) - set(self.ClustDist.keys()))

        tick_list = list(self.ClustDist.keys()) + [" "] * (len(missing_keys))

        # add the necessary zeroes for points that are still to be processed
        for m_k in missing_keys:
            plot_dic[m_k] = 0

        # fig, ax = plt.subplots(1, 1, figsize=(12, 5))

        self.ax.cla()

        self.ax.set_title("Reachability Plot")
        self.ax.set_ylabel("reachability distance")

        self.ax.bar(plot_dic.keys(), plot_dic.values())

        self.ax.set_xticklabels(tick_list, rotation=90, fontsize=8)

        # plot horizontal line for eps
        self.ax.axhline(self.eps, color="red", linewidth=3)

        self.ax_t.set_ylim(self.ax.get_ylim())
        self.ax_t.set_yticks([self.eps])
        self.ax_t.set_yticklabels(["\u03B5"])

        self.canvas_down.draw()

        if save_plots is True:
            self.canvas_down.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/reachplot_{:02}.png".format(
                    self.name, self.ind_run, ind_fig
                )
            )

        QCoreApplication.processEvents()
예제 #9
0
    def plot_clust_gui(self, save_plots=False):
        """
        Plot a scatter plot on the left, where points are colored according to the cluster they belong to,
        and a reachability plot on the right, where colors correspond to the clusters, and the two horizontal
        lines represent eps and eps_db.
        """

        self.ax1.set_title("OPTICS Cluster Plot")

        self.ax.set_title("OPTICS Reachability Plot")
        self.ax.set_ylabel("reachability distance")

        X_dict = dict(zip([str(i) for i in range(len(self.X))], self.X))

        # extract the cluster dictionary using DBSCAN
        cl = ExtractDBSCANclust(self.ClustDist, self.CoreDist, self.eps_extr)

        new_dict = {
            key: (val1, cl[key])
            for key, val1 in zip(list(X_dict.keys()), list(X_dict.values()))
        }

        new_dict = OrderedDict((k, new_dict[k]) for k in list(self.ClustDist.keys()))

        df = pd.DataFrame(
            dict(
                x=[i[0][0] for i in list(new_dict.values())],
                y=[i[0][1] for i in list(new_dict.values())],
                label=[i[1] for i in list(new_dict.values())],
            ),
            index=new_dict.keys(),
        )

        colors = {
            -1: "red",
            0: "lightblue",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "navy",
            6: "orange",
            7: "purple",
            8: "salmon",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "lime",
        }

        # first plot: scatter plot of points colored according to the cluster they belong to
        # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))

        grouped = df.groupby("label")
        for key, group in grouped:
            group.plot(
                ax=self.ax1,
                kind="scatter",
                x="x",
                y="y",
                label=key,
                color=colors[key % 13 if key != -1 else -1],
                s=300,
                edgecolor="black",
            )

        self.ax1.set_xlabel("")
        self.ax1.set_ylabel("")

        for i, txt in enumerate([i for i in range(len(self.X))]):
            self.ax1.annotate(
                txt,
                (self.X[:, 0][i], self.X[:, 1][i]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        # second plot: reachability plot, with colors corresponding to clusters
        plot_dic = {}

        for key, value in self.ClustDist.items():

            if np.isinf(value) == True:

                plot_dic[key] = self.eps * 1.15

            else:

                plot_dic[key] = self.ClustDist[key]

        tick_list = list(self.ClustDist.keys())

        self.ax.bar(
            plot_dic.keys(),
            plot_dic.values(),
            color=[colors[i % 13] if i != -1 else "red" for i in df.label],
        )

        self.ax.axhline(self.eps, color="black", linewidth=3)

        self.ax.axhline(self.eps_extr, color="black", linewidth=3)

        self.ax_t.set_ylim(self.ax.get_ylim())
        self.ax_t.set_yticks([self.eps, self.eps_extr])
        self.ax_t.set_yticklabels(["\u03B5", "\u03B5" + "'"])
        self.ax.set_xticklabels(tick_list, rotation=90, fontsize=8)

        self.canvas_up.draw()
        self.canvas_down.draw()

        if save_plots is True:
            self.canvas_up.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/fig_fin_{:02}.png".format(
                    self.name, self.ind_run, self.ind_extr_fig
                )
            )
            self.canvas_down.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/reach_plot_fin_{:02}.png".format(
                    self.name, self.ind_run, self.ind_extr_fig
                )
            )

        QCoreApplication.processEvents()
예제 #10
0
    def plot_clust_dict_gui(self, data, lab_dict, ax, canvas, save_plots, ind_fig):

        ax.clear()
        ax.set_title("DENCLUE clusters")

        colors = {
            0: "seagreen",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
            18: "lime",
            19: "peru",
            20: "dodgerblue",
            21: "teal",
            22: "royalblue",
            23: "tomato",
            24: "bisque",
            25: "palegreen",
        }

        col = [
            colors[lab_dict.label[i] % len(colors)]
            if lab_dict.label[i] != -1
            else "red"
            for i in range(len(lab_dict))
        ]

        ax.scatter(
            np.array(data)[:, 0],
            np.array(data)[:, 1],
            s=300,
            edgecolor="black",
            color=col,
            alpha=0.8,
        )

        df_dens_attr = lab_dict.groupby("label").mean()

        x_dens_attr = [
            df_dens_attr.loc[i]["x"] for i in range(df_dens_attr.iloc[-1].name + 1)
        ]
        y_dens_attr = [
            df_dens_attr.loc[i]["y"] for i in range(df_dens_attr.iloc[-1].name + 1)
        ]

        ax.scatter(
            x_dens_attr,
            y_dens_attr,
            color="red",
            marker="X",
            s=300,
            edgecolor="black",
            label="density attractors",
        )

        # add indexes to points in plot
        for i, txt in enumerate(range(len(data))):
            ax.annotate(
                txt,
                (np.array(data)[i, 0], np.array(data)[i, 1]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        cond_1 = np.sum(self.plot_list) == 4
        cond_2 = (np.sum(self.plot_list) == 3) and (self.plot_list[2] == False)

        if cond_1 or cond_2:
            self.openFinalStepWindow_3(canvas=canvas)

        ax.legend(fontsize=8)
        canvas.draw()

        if save_plots is True:
            canvas.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)
            )

        QCoreApplication.processEvents()
예제 #11
0
    def point_plot_gui(
        self,
        X_dict,
        coords,
        neigh,
        processed=None,
        col="yellow",
        save_plots=False,
        ind_fig=None,
    ):
        """
        Plots a scatter plot of points, where the point (x,y) is light black and
        surrounded by a red circle of radius eps, where processed point are plotted
        in col (yellow by default) and without edgecolor, whereas still-to-process points are green
        with black edgecolor.

        :param X_dict: input dictionary version of X.
        :param coords: coordinates of the point that is currently inspected.
        :param neigh: neighborhood of the point as dictionary.
        :param processed: already processed points, to plot in col
        :param col: color to use for processed points, yellow by default.
        :param ind_fig: index of the figure that is saved.
        :param save_plots: if True, the produced image is saved.

        """

        # fig, ax = plt.subplots(figsize=(14, 6))
        self.ax1.cla()
        self.ax1.set_title("{} procedure".format(self.name))

        # plot every point in color lime
        self.ax1.scatter(
            self.X[:, 0],
            self.X[:, 1],
            s=300,
            color="lime",
            edgecolor="black",
            label="unprocessed",
        )

        # plot clustered points according to appropriate colors
        if processed is not None:
            X_not_proc = [X_dict[i][0] for i in processed]
            Y_not_proc = [X_dict[i][1] for i in processed]
            self.ax1.scatter(
                X_not_proc, Y_not_proc, s=300, color=col, label="processed"
            )

        # plot points in neighboorhood in red, if neigh is not empty
        if len(neigh) != 0:
            neigh_array = np.array(list(neigh.values()))
            self.ax1.scatter(
                neigh_array[:, 0],
                neigh_array[:, 1],
                s=300,
                color="red",
                label="neighbors",
            )

        # plot last added point in black and surround it with a red circle
        self.ax1.scatter(x=coords[0], y=coords[1], s=400, color="black", alpha=0.4)

        circle1 = plt.Circle(
            (coords[0], coords[1]),
            self.eps,
            color="r",
            fill=False,
            linewidth=3,
            alpha=0.7,
        )
        self.ax1.add_artist(circle1)

        for i, txt in enumerate([i for i in range(len(self.X))]):
            self.ax1.annotate(
                txt,
                (self.X[:, 0][i], self.X[:, 1][i]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        # self.ax1.set_aspect('equal')
        self.ax1.legend(fontsize=8)
        self.canvas_up.draw()

        if save_plots is True:
            self.canvas_up.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)
            )

        QCoreApplication.processEvents()
예제 #12
0
    def plot_3d_or_contour_gui(
        self,
        data,
        s,
        ax,
        canvas,
        save_plots,
        ind_fig,
        three=False,
        scatter=False,
        prec=3,
    ):

        ax.clear()
        ax.set_title("Scatterplot with Countour plot")

        x_data = [np.array(data)[:, 0].min(), np.array(data)[:, 0].max()]
        y_data = [np.array(data)[:, 1].min(), np.array(data)[:, 1].max()]
        mixed_data = [min(x_data[0], y_data[0]), max(x_data[1], y_data[1])]

        xx = np.outer(
            np.linspace(mixed_data[0] - 1, mixed_data[1] + 1, prec * 10),
            np.ones(prec * 10),
        )
        yy = xx.copy().T  # transpose
        z = np.empty((prec * 10, prec * 10))
        for i, a in enumerate(range(prec * 10)):

            if i == int((prec * 10) / 4):
                self.log.appendPlainText("contour progress: 25%")
                QCoreApplication.processEvents()
            if i == (prec * 10) / 2:
                self.log.appendPlainText("contour progress: 50%")
                QCoreApplication.processEvents()
            if i == int(((prec * 10) / 4) * 3):
                self.log.appendPlainText("contour progress: 75%")
                QCoreApplication.processEvents()

            for j, b in enumerate(range(prec * 10)):
                z[i, j] = gauss_dens(x=np.array([xx[i][a], yy[i][b]]), D=data, s=s)

        if three is True:
            pass
            # ax = plt.axes(projection="3d")
            # ax.plot_surface(xx, yy, z, cmap='winter', edgecolor='none')
            # plt.show()
        else:
            CS = ax.contour(xx, yy, z, cmap="winter")
            ax.clabel(CS, inline=1, fontsize=10)

            if (scatter is True) and (three is False):
                ax.scatter(
                    np.array(data)[:, 0],
                    np.array(data)[:, 1],
                    s=300,
                    edgecolor="black",
                    color="yellow",
                    alpha=0.6,
                )

            self.log.appendPlainText("contour progress: 100%")
            self.log.appendPlainText("")
            QCoreApplication.processEvents()

            canvas.draw()

            if save_plots is True:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/")
                    + "/"
                    + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)
                )

            QCoreApplication.processEvents()
예제 #13
0
    def plot_3d_both_gui(
        self, data, s, ax, canvas, save_plots, ind_fig, xi=None, prec=3
    ):

        ax.clear()
        from matplotlib import cm

        x_data = [np.array(data)[:, 0].min(), np.array(data)[:, 0].max()]
        y_data = [np.array(data)[:, 1].min(), np.array(data)[:, 1].max()]
        mixed_data = [min(x_data[0], y_data[0]), max(x_data[1], y_data[1])]

        xx = np.outer(
            np.linspace(mixed_data[0] - 1, mixed_data[1] + 1, prec * 10),
            np.ones(prec * 10),
        )
        yy = xx.copy().T  # transpose
        z = np.empty((prec * 10, prec * 10))
        z_xi = np.empty((prec * 10, prec * 10))

        for i, a in enumerate(range(prec * 10)):

            if i == int((prec * 10) / 4):
                self.log.appendPlainText("3dplot progress: 25%")
                QCoreApplication.processEvents()
            if i == (prec * 10) / 2:
                self.log.appendPlainText("3dplot progress: 50%")
                QCoreApplication.processEvents()
            if i == int(((prec * 10) / 4) * 3):
                self.log.appendPlainText("3dplot progress: 75%")
                QCoreApplication.processEvents()

            for j, b in enumerate(range(prec * 10)):

                z[i, j] = gauss_dens(x=np.array([xx[i][a], yy[i][b]]), D=data, s=s)
                if xi is not None:
                    if z[i, j] >= xi:
                        z_xi[i, j] = z[i, j]
                    else:
                        z_xi[i, j] = xi

        # to set colors according to xi value, red if greater, yellow if smaller
        if xi is not None:
            xi_data = []
            for a, b in zip(np.array(data)[:, 0], np.array(data)[:, 1]):
                to_be_eval = gauss_dens(x=np.array([a, b]), D=data, s=s)
                if to_be_eval >= xi:
                    xi_data.append("red")
                else:
                    xi_data.append("yellow")

        offset = -15

        if xi is not None:
            plane = ax.plot_surface(xx, yy, z_xi, cmap=cm.ocean, alpha=0.9)

        surf = ax.plot_surface(xx, yy, z, alpha=0.8, cmap=cm.ocean)

        cset = ax.contourf(xx, yy, z, zdir="z", offset=offset, cmap=cm.ocean)

        if xi is not None:
            color_plot = xi_data
        else:
            color_plot = "red"

        ax.scatter(
            np.array(data)[:, 0],
            np.array(data)[:, 1],
            offset,
            s=30,
            edgecolor="black",
            color=color_plot,
            alpha=0.6,
        )

        canvas.figure.colorbar(surf, ax=ax, shrink=0.5, aspect=5)

        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.set_zlabel("Z")
        ax.set_zlim(offset, np.max(z))
        ax.set_title("3D surface with 2D contour plot projections")

        cond_1 = np.sum(self.plot_list) == 4
        cond_2 = (np.sum(self.plot_list) == 3) and (self.plot_list[2] == True)

        if cond_1 or cond_2:
            self.openFinalStepWindow_2(canvas=canvas)

        self.log.appendPlainText("3dplot progress: 100%")
        self.log.appendPlainText("")
        canvas.draw()

        if save_plots is True:
            canvas.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)
            )

        QCoreApplication.processEvents()
예제 #14
0
    def plot_grid_rect_gui(
        self,
        data,
        s,
        ax,
        canvas,
        save_plots,
        ind_fig,
        cube_kind="populated",
        color_grids=True,
    ):

        ax.clear()
        ax.set_title("Highly populated cubes")

        cl, ckc = pop_cubes(data, s)

        cl_copy = cl.copy()

        coms = [center_of_mass(list(cl.values())[i]) for i in range(len(cl))]
        coms_hpc = []

        if cube_kind == "highly_populated":
            cl = highly_pop_cubes(cl, xi_c=3)
            coms_hpc = [center_of_mass(list(cl.values())[i]) for i in range(len(cl))]

        ax.scatter(data[:, 0], data[:, 1], s=100, edgecolor="black")

        rect_min = data.min(axis=0)
        rect_diff = data.max(axis=0) - rect_min

        x0 = rect_min[0] - 0.05
        y0 = rect_min[1] - 0.05

        # minimal bounding rectangle
        ax.add_patch(
            Rectangle(
                (x0, y0),
                rect_diff[0] + 0.1,
                rect_diff[1] + 0.1,
                fill=None,
                color="r",
                alpha=1,
                linewidth=3,
            )
        )

        ax.scatter(
            np.array(coms)[:, 0],
            np.array(coms)[:, 1],
            s=100,
            marker="X",
            color="red",
            edgecolor="black",
            label="centers of mass",
        )

        if cube_kind == "highly_populated":
            for i in range(len(coms_hpc)):
                ax.add_artist(
                    Circle(
                        (np.array(coms_hpc)[i, 0], np.array(coms_hpc)[i, 1]),
                        4 * s,
                        color="red",
                        fill=False,
                        linewidth=2,
                        alpha=0.6,
                    )
                )

        tot_cubes = connect_cubes(cl, cl_copy, s)

        new_clusts = {
            i: tot_cubes[i] for i in list(tot_cubes.keys()) if i not in list(cl.keys())
        }

        for key in list(new_clusts.keys()):
            (a, b, c, d) = ckc[key]
            ax.add_patch(
                Rectangle(
                    (a, b),
                    2 * s,
                    2 * s,
                    fill=True,
                    color="yellow",
                    alpha=0.3,
                    linewidth=3,
                )
            )

        for key in list(ckc.keys()):

            (a, b, c, d) = ckc[key]

            if color_grids is True:
                if key in list(cl.keys()):
                    color_or_not = True if cl[key][0] > 0 else False
                else:
                    color_or_not = False
            else:
                color_or_not = False

            ax.add_patch(
                Rectangle(
                    (a, b),
                    2 * s,
                    2 * s,
                    fill=color_or_not,
                    color="g",
                    alpha=0.3,
                    linewidth=3,
                )
            )
        ax.legend(fontsize=8)
        canvas.draw()

        if save_plots is True:
            canvas.figure.savefig(
                appctxt.get_resource("Images/")
                + "/"
                + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)
            )

        QCoreApplication.processEvents()
예제 #15
0
    def plot_pam_gui(
        self, data, ax, canvas, cl, ind_run, ind_fig, name="PAM", save_plots=False
    ):
        """
        Scatterplot of data points, with colors according to cluster labels.
        Centers of mass of the clusters are marked with an X.

        :param data: input data sample as dataframe.
        :param cl: cluster dictionary.

        """
        ax.clear()
        if ind_fig is not None:
            ax.set_title("{} run number {}".format(name, ind_fig + 1))
        else:
            ax.set_title("{} final clusters".format(name))

        # all points are plotted in white
        ax.scatter(
            np.array(data)[:, 0],
            np.array(data)[:, 1],
            s=300,
            color="white",
            edgecolor="black",
        )

        colors = {
            0: "seagreen",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
        }

        # plot the points with colors according to the cluster they belong to
        for i, el in enumerate(list(cl.values())):
            ax.scatter(
                np.array(data)[el, 0],
                np.array(data)[el, 1],
                s=300,
                color=colors[i % 18],
                edgecolor="black",
            )

        # plot centers of mass, marked with an X
        for i, el in enumerate(list(cl.keys())):
            ax.scatter(
                np.array(data)[el, 0],
                np.array(data)[el, 1],
                s=500,
                color="red",
                marker="X",
                edgecolor="black",
            )

        # plot indexes of points in plot
        for i, txt in enumerate([i for i in range(len(data))]):
            ax.annotate(
                txt,
                (np.array(data)[:, 0][i], np.array(data)[:, 1][i]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )
        canvas.draw()

        if save_plots is True:
            if ind_fig is not None:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/")
                    + "/"
                    + "{}_{:02}/fig_{:02}.png".format(name, ind_run, ind_fig)
                )
            else:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/")
                    + "/"
                    + "{}_{:02}/fig_fin.png".format(name, ind_run)
                )

        QCoreApplication.processEvents()
예제 #16
0
    def plot_pam_mod_gui(
        self,
        data,
        ax,
        canvas,
        cl,
        full,
        ind_run,
        ind_fig,
        name="CLARA",
        save_plots=False,
    ):
        """
        Scatterplot of data points, with colors according to cluster labels. Only sampled
        points are plotted, the others are only displayed with their indexes; moreover,
        centers of mass of the clusters are marked with an X.

        :param data: input data sample as dataframe.
        :param ax: axis on which to plot.
        :param cl: cluster dictionary.
        :param full: full input dataframe.
        :param canvas: figure where to plot.
        :param ind_run: how many times the algorithm has run.
        :param name: name of the algorithm.
        :param save_plots: if True, save the plots produced.


        """
        ax.clear()
        if ind_fig is not None:
            ax.set_title("{} run number {}".format(name, ind_fig + 1))
        else:
            ax.set_title("{} final clustering".format(name))

        # just as a habit, it actually doesnt plot anything because points are white with white edgecolor
        ax.scatter(full.iloc[:, 0],
                   full.iloc[:, 1],
                   s=300,
                   color="white",
                   edgecolor="white")

        colors = {
            0: "seagreen",
            1: "lightcoral",
            2: "yellow",
            3: "grey",
            4: "pink",
            5: "turquoise",
            6: "orange",
            7: "purple",
            8: "yellowgreen",
            9: "olive",
            10: "brown",
            11: "tan",
            12: "plum",
            13: "rosybrown",
            14: "lightblue",
            15: "khaki",
            16: "gainsboro",
            17: "peachpuff",
        }

        # plot the sampled point, with colors according to the cluster they belong to
        for i, el in enumerate(list(cl.values())):
            ax.scatter(
                data.loc[el, 0],
                data.loc[el, 1],
                s=300,
                color=colors[i % 18],
                edgecolor="black",
            )

        # plot centers of mass, marked with an X
        for i, el in enumerate(list(cl.keys())):
            ax.scatter(
                data.loc[el, 0],
                data.loc[el, 1],
                s=500,
                color="red",
                marker="X",
                edgecolor="black",
            )

        # plot indexes of points in plot
        for i, txt in enumerate([i for i in range(len(full))]):
            ax.annotate(
                txt,
                (full.iloc[i, 0], full.iloc[i, 1]),
                fontsize=10,
                size=10,
                ha="center",
                va="center",
            )

        canvas.draw()

        if save_plots is True:
            if ind_fig is not None:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_{:02}.png".format(name, ind_run, ind_fig))
            else:
                canvas.figure.savefig(
                    appctxt.get_resource("Images/") + "/" +
                    "{}_{:02}/fig_fin.png".format(name, ind_run))

        QCoreApplication.processEvents()