def __update_clusters(self): for i in range( self.max_iter ): # to stop if convergence isn't reached whithin max_iter iterations self.log.appendPlainText("") self.log.appendPlainText("iteration n°: {}".format(i + 1)) # compute distance obtained by swapping medoids in the clusters cluster_dist_with_new_medoids = self.__swap_and_recalculate_clusters( ) # if the new sum of cluster_distances is smaller than the old one if self.__is_new_cluster_dist_small( cluster_dist_with_new_medoids) is True: self.log.appendPlainText("new is smaller") # compute clusters and cluster_distance with new medoids self.clusters, self.cluster_distances = self.__calculate_clusters( self.medoids) self.log.appendPlainText("clusters: {}".format(self.clusters)) if self.delay != 0: pause_execution(self.delay) self.plot_pam_gui(data=self.__data, cl=self.clusters, ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, ind_fig=i + 1, save_plots=self.save_fig) # print("clusters_distances: ", self.cluster_distances) else: # if the sum of cluster_distances doesn't improve, terminate the algorithm self.log.appendPlainText("termination") break
def __insert_data(self, plotting=False): """! @brief Inserts input data to the tree. @remark If number of maximum number of entries is exceeded than diameter is increased and tree is rebuilt. """ for index_point in range(0, len(self.__pointer_data)): if (index_point != 0) and (plotting is True): if self.delay != 0: pause_execution(self.delay) plot_tree_fin_gui(tree=self.__tree, log=self.log, ind_run=self.ind_run, ind_fig=self.index_for_saving_plot, label_graphviz=self.label_graphviz, save_plots=self.save_fig) plot_birch_leaves_gui(tree=self.__tree, data=self.__pointer_data, ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, ind_fig=self.index_for_saving_plot, save_plots=self.save_fig) self.index_for_saving_plot += 1 self.log.appendPlainText("") self.log.appendPlainText("index: {}".format(index_point)) point = self.__pointer_data[index_point] self.log.appendPlainText("point [{}, {}]".format(round(point[0], 2), round(point[1], 2))) self.__tree.insert_cluster([point]) if self.__tree.amount_entries > self.__entry_size_limit: self.log.appendPlainText("rebuilding tree") self.__tree = self.__rebuild_tree(index_point)
def __start_algo(self): self.log.appendPlainText("starting algorithm") self.__initialize_medoids() # choosing initial medoids # computing clusters and cluster_distances self.clusters, self.cluster_distances = self.__calculate_clusters(self.medoids) # print cluster and cluster_distances self.log.appendPlainText("clusters: {}".format(self.clusters)) self.log.appendPlainText( "clusters_distances: {}".format(self.cluster_distances) ) if self.delay != 0: pause_execution(self.delay) self.plot_pam_gui( data=self.__data, cl=self.clusters, ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, ind_fig=0, save_plots=self.save_fig, ) self.__update_clusters()
def plot2d_data_gui(self, df, canvas, ax, save_plots, ind_fig=None, col_i=None): if self.delay != 0: pause_execution(self.delay) ax.clear() ax.set_title(self.name + " Merging") colors = { 0: "seagreen", 1: "dodgerblue", 2: "yellow", 3: "grey", 4: "pink", 5: "turquoise", 6: "orange", 7: "purple", 8: "yellowgreen", 9: "olive", 10: "brown", 11: "tan", 12: "plum", 13: "rosybrown", 14: "lightblue", 15: "khaki", 16: "gainsboro", 17: "peachpuff", 18: "lime", 19: "peru", 20: "beige", 21: "teal", 22: "royalblue", 23: "tomato", 24: "bisque", 25: "palegreen", } color_list = [colors[i] for i in df["cluster"]] df.plot(kind="scatter", c=color_list, x=0, y=1, ax=ax, s=100) ax.set_xlabel("") ax.set_ylabel("") if col_i is not None: ax.scatter( df[df.cluster == col_i].iloc[:, 0], df[df.cluster == col_i].iloc[:, 1], color="black", s=140, edgecolors="white", alpha=0.8, ) canvas.draw() if save_plots is True: canvas.figure.savefig( appctxt.get_resource("Images/") + "/" + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)) QCoreApplication.processEvents()
def plot2d_graph_gui(self, graph, canvas, ax, save_plots, ind_fig=None, print_clust=True): if self.delay != 0: pause_execution(self.delay) ax.clear() ax.set_title(self.name + " Graph Clustering") pos = nx.get_node_attributes(graph, "pos") colors = { 0: "seagreen", 1: "dodgerblue", 2: "yellow", 3: "grey", 4: "pink", 5: "turquoise", 6: "orange", 7: "purple", 8: "yellowgreen", 9: "olive", 10: "brown", 11: "tan", 12: "plum", 13: "rosybrown", 14: "lightblue", 15: "khaki", 16: "gainsboro", 17: "peachpuff", 18: "lime", 19: "peru", 20: "beige", 21: "teal", 22: "royalblue", 23: "tomato", 24: "bisque", 25: "palegreen", } el = nx.get_node_attributes(graph, "cluster").values() cmc = Counter(el).most_common() c = [colors[i % len(colors)] for i in el] if print_clust is True: self.log.appendPlainText("clusters: {}".format(cmc)) if len(el) != 0: # is set # print(pos) nx.draw(graph, pos, node_color=c, node_size=60, edgecolors="black", ax=ax) else: nx.draw(graph, pos, node_size=60, edgecolors="black", ax=ax) canvas.draw() if save_plots is True: canvas.figure.savefig( appctxt.get_resource("Images/") + "/" + "{}_{:02}/fig_{:02}.png".format(self.name, self.ind_run, ind_fig)) QCoreApplication.processEvents()
def DBSCAN_gui(self, plotting=True, print_details=True, delay=0): """ DBSCAN algorithm. :param plotting: if True, executes point_plot_mod, plotting every time a points is added to a clusters :param print_details: if True, prints the length of the "external" NearestNeighborhood and of the "internal" one (in the while loop). :param delay: seconds for which to delay the algorithm, so that the images displayes in the GUI show at a slower pace. :return ClustDict: dictionary of the form point_index:cluster_label. """ self.update_log(initial=True) index_for_saving_plots = 0 # initialize dictionary of clusters self.ClustDict = {} clust_id = -1 X_dict = dict(zip([str(i) for i in range(len(self.X))], self.X)) processed = [] processed_list = [] # for every point in the dataset for point in X_dict: # if it hasnt been visited if point not in processed: # mark it as visited processed.append(point) # scan its neighborhood N = scan_neigh1_mod(X_dict, X_dict[point], self.eps) if print_details == True: self.update_log(point, " initial len(N): " + str(len(N)), change_current=True) # print("len(N): ", len(N)) # if there are less than minPTS in its neighborhood, classify it as noise if len(N) < self.mp: self.ClustDict.update({point: -1}) if plotting == True: if delay != 0: pause_execution(delay) self.point_plot_mod_gui(X_dict, point, save_plots=self.save_plots, ind_fig=index_for_saving_plots) index_for_saving_plots += 1 self.update_log(noise=True) # else if it is a Core point else: # increase current id of cluster clust_id += 1 # put it in the cluster dictionary self.ClustDict.update({point: clust_id}) if plotting == True: if delay != 0: pause_execution(delay) self.point_plot_mod_gui(X_dict, point, save_plots=self.save_plots, ind_fig=index_for_saving_plots) index_for_saving_plots += 1 # add it to the temporary processed list processed_list = [point] # remove it from the neighborhood N del N[point] # until the neighborhood is empty while len(N) > 0: # take a random point in neighborhood n = random.choice(list(N.keys())) if print_details == True: self.update_log(n, " updated len(N): " + str(len(N)), change_subcurrent=True) # print("len(N) in while loop: ", len(N)) # but the point must not be in processed_list aka already visited while n in processed_list: n = random.choice(list(N.keys())) # put it in processed_list processed_list.append(n) # remove it from the neighborhood del N[n] # if it hasnt been visited if n not in processed: # mark it as visited processed.append(n) # scan its neighborhood N_2 = scan_neigh1_mod(X_dict, X_dict[n], self.eps) if print_details == True: self.update_log( point, " len(N_sub): " + str(len(N_2))) # print("len(N2): ", len(N_2)) # if it is a core point if len(N_2) >= self.mp: # add each element of its neighborhood to the neighborhood N for element in N_2: if element not in processed_list: N.update({element: X_dict[element]}) # if n has not been inserted into cluster dictionary or if it has previously been # classified as noise, update the cluster dictionary if (n not in self.ClustDict) or (self.ClustDict[n] == -1): self.ClustDict.update({n: clust_id}) if plotting == True: if delay != 0: pause_execution(delay) self.point_plot_mod_gui( X_dict, n, save_plots=self.save_plots, ind_fig=index_for_saving_plots) index_for_saving_plots += 1
def process(self, plotting=False): """! @brief Performs cluster analysis in line with rules of CLARANS algorithm. @return (clarans) Returns itself (CLARANS instance). @see get_clusters() @see get_medoids() """ random.seed() index_for_saving_plots = 0 # loop for a numlocal number of times for _ in range(0, self.__numlocal): self.log.appendPlainText("") self.log.appendPlainText("numlocal (iteration): {}".format(_ + 1)) # set (current) random medoids self.__current = random.sample(range(0, len(self.__pointer_data)), self.__number_clusters) # update clusters in line with random allocated medoids self.__update_clusters(self.__current) # optimize configuration self.__optimize_configuration() # obtain cost of current cluster configuration and compare it with the best obtained estimation = self.__calculate_estimation() if estimation < self.__optimal_estimation: self.log.appendPlainText("Better configuration found with " "medoids: {0} and cost: {1}".format( self.__current[:], estimation)) self.__optimal_medoids = self.__current[:] self.__optimal_estimation = estimation if plotting is True: self.__update_clusters(self.__optimal_medoids) if self.delay != 0: pause_execution(self.delay) self.PAM.plot_pam_gui( data=self.__pointer_data, name="CLARANS", cl=dict(zip(self.__optimal_medoids, self.__clusters)), ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, ind_fig=index_for_saving_plots, save_plots=self.save_fig, ) else: self.log.appendPlainText( "Configuration found does not improve current " "best one because its cost is {0}".format(estimation)) if plotting is True: self.__update_clusters(self.__current[:]) if self.delay != 0: pause_execution(self.delay) self.PAM.plot_pam_gui( data=self.__pointer_data, cl=dict(zip(self.__current[:], self.__clusters)), ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, name="CLARANS", ind_fig=index_for_saving_plots, save_plots=self.save_fig, ) index_for_saving_plots += 1 self.__update_clusters(self.__optimal_medoids) if plotting is True: self.log.appendPlainText("") self.log.appendPlainText("FINAL RESULT") if self.delay != 0: pause_execution(self.delay) self.PAM.plot_pam_gui( data=self.__pointer_data, cl=dict(zip(self.__optimal_medoids, self.__clusters)), ax=self.ax, canvas=self.canvas, ind_run=self.ind_run, name="CLARANS", ind_fig=None, save_plots=self.save_fig, ) return self
def cure_gui( self, data, k, ax, canvas, plotting=True, preprocessed_data=None, partial_index=None, n_rep_finalclust=None, not_sampled=None, not_sampled_ind=None, delay=0, ind_fig_bis=None, ): """ CURE algorithm: hierarchical agglomerative clustering using representatives. :param data: input data. :param plotting: if True, plots all intermediate steps. :param k: the desired number of clusters. #the following parameter are used for the large dataset variation of CURE :param preprocessed_data: if not None, must be of the form (clusters,representatives,matrix_a,X_dist1), which is used to perform a warm start. :param partial_index: if not None, is is used as index of the matrix_a, of cluster points and of representatives. :param n_rep_finalclust: the final representative points used to classify the not_sampled points. :param not_sampled: points not sampled in the initial phase. :param not_sampled_ind: indexes of not_sampled points. :return (clusters, rep, a): returns the clusters dictionary, the dictionary of representatives, the matrix a """ ax.cla() index_for_saving_plots = 0 # starting from raw data if preprocessed_data is None: # building a dataframe storing the x and y coordinates of input data points l = [[i, i] for i in range(len(data))] flat_list = [item for sublist in l for item in sublist] col = [ str(el) + "x" if i % 2 == 0 else str(el) + "y" for i, el in enumerate(flat_list) ] # using the original indexes if necessary if partial_index is not None: a = pd.DataFrame(index=partial_index, columns=col) else: a = pd.DataFrame(index=[str(i) for i in range(len(data))], columns=col) # adding the real coordinates a["0x"] = data.T[0] a["0y"] = data.T[1] b = a.dropna(axis=1, how="all") # initial clusters if partial_index is not None: clusters = dict(zip(partial_index, data)) else: clusters = { str(i): np.array(data[i]) for i in range(len(data)) } # build Xdist X_dist1 = dist_mat_gen(b) # initialize representatives if partial_index is not None: rep = { partial_index[i]: [data[int(i)]] for i in range(len(data)) } else: rep = {str(i): [data[i]] for i in range(len(data))} # just as placeholder for while loop heap = [1] * len(X_dist1) # store minimum distances between clusters for each iteration levels = [] # use precomputed data else: clusters = preprocessed_data[0] rep = preprocessed_data[1] a = preprocessed_data[2] X_dist1 = preprocessed_data[3] heap = [1] * len(X_dist1) levels = [] # store original index if partial_index is not None: initial_index = deepcopy(partial_index) # while the desired number of clusters has not been reached while len(heap) > k: # find minimum value of heap queu, which stores clusters according to the distance from # their closest cluster list_argmin = list(X_dist1.apply(lambda x: np.argmin(x)).values) list_min = list(X_dist1.min(axis=0).values) heap = dict(zip(list(X_dist1.index), list_min)) heap = dict(OrderedDict(sorted(heap.items(), key=lambda kv: kv[1]))) closest = dict(zip(list(X_dist1.index), list_argmin)) # get minimum keys and delete them from heap and closest dictionaries u = min(heap, key=heap.get) levels.append(heap[u]) del heap[u] # u_cl = closest[u] u_cl = X_dist1.columns[closest[u]] del closest[u] # form the new cluster if (np.array(clusters[u]).shape == (2, )) and (np.array( clusters[u_cl]).shape == (2, )): w = [clusters[u], clusters[u_cl]] elif (np.array(clusters[u]).shape != (2, )) and (np.array(clusters[u_cl]).shape == (2, )): clusters[u].append(clusters[u_cl]) w = clusters[u] elif (np.array(clusters[u]).shape == (2, )) and (np.array(clusters[u_cl]).shape != (2, )): clusters[u_cl].append(clusters[u]) w = clusters[u_cl] else: w = clusters[u] + clusters[u_cl] # delete old cluster del clusters[u] del clusters[u_cl] # set new name name = "(" + u + ")" + "-" + "(" + u_cl + ")" clusters[name] = w # update representatives rep[name] = sel_rep_fast(rep[u] + rep[u_cl], clusters, name, self.n_repr, self.alpha_cure) # update distance matrix X_dist1 = update_mat_cure(X_dist1, u, u_cl, rep, name) # delete old representatives del rep[u] del rep[u_cl] if plotting is True: if delay != 0: pause_execution(self.delay) dim1 = int(a.loc[u].notna().sum()) # update the matrix a with the new cluster a.loc["(" + u + ")" + "-" + "(" + u_cl + ")", :] = a.loc[u].fillna(0) + a.loc[u_cl].shift( dim1, fill_value=0) a = a.drop(u, 0) a = a.drop(u_cl, 0) # in the large dataset version of CURE if partial_index is not None: # only in last step of large dataset version of CURE if ((len(heap) == k) and (not_sampled is not None) and (not_sampled_ind is not None)): # take random representative points from the final representatives final_reps = { list(rep.keys())[i]: random.sample( list(rep.values())[i], min(n_rep_finalclust, len(list(rep.values())[i])), ) for i in range(len(rep)) } partial_index = self.point_plot_mod2_gui( data=data, a=a, reps=rep[name], ax=ax, canvas=canvas, level_txt=levels[-1], par_index=partial_index, u=u, u_cl=u_cl, initial_ind=initial_index, last_reps=final_reps, not_sampled=not_sampled, not_sampled_ind=not_sampled_ind, n_rep_fin=n_rep_finalclust, save_plots=self.save_plots, ind_fig=index_for_saving_plots, ind_fig_bis=ind_fig_bis, ) # in the intermediate steps of the large dataset version else: partial_index = self.point_plot_mod2_gui( data=data, a=a, reps=rep[name], ax=ax, canvas=canvas, level_txt=levels[-1], par_index=partial_index, u=u, u_cl=u_cl, initial_ind=initial_index, save_plots=self.save_plots, ind_fig=index_for_saving_plots, ind_fig_bis=ind_fig_bis, ) else: self.point_plot_mod2_gui( a=a, reps=rep[name], ax=ax, canvas=canvas, level_txt=levels[-1], save_plots=self.save_plots, ind_fig=index_for_saving_plots, ind_fig_bis=ind_fig_bis, ) index_for_saving_plots += 1 return clusters, rep, a
def clara(self, _df, _k, _fn): """The main clara clustering iterative algorithm. :param _df: Input dataframe. :param _k: Number of medoids. :param _fn: The distance function to use. :return: The minimized cost, the best medoid choices and the final configuration. """ _df = pd.DataFrame(_df) size = len(_df) if size > 100000: niter = 1000 runs = 1 else: niter = self.max_iter runs = 5 # initialize min_avg_cost to infinity min_avg_cost = np.inf best_choices = [] best_results = {} index_for_saving_plot = 0 for j in range(runs): # usually 5 times self.log.appendPlainText("") self.log.appendPlainText("run number: {}".format(j)) # take 40+_k*2 random indexes from input data if size < (40 + _k * 2): self.log.clear() self.log.appendPlainText("ERROR") self.log.appendPlainText("") self.log.appendPlainText("The dimension of the input dataset must be at least 40 + 2*n_medoids") return else: sampling_idx = random.sample([i for i in range(size)], 40 + _k * 2) # take the corresponding rows from input dataframe _df # prov_dic = {i: sampling_idx[i] for i in range(40 + _k * 2)} # print(prov_dic) sampling_data = [] for idx in sampling_idx: sampling_data.append(_df.iloc[idx]) # create the sample dataframe sampled_df = pd.DataFrame(sampling_data, index=sampling_idx) # return total cost, medoids and clusters of sampled_df pre_cost, pre_choice, pre_medoids = self.k_medoids(sampled_df, _k, _fn, niter) if self.delay != 0: pause_execution(self.delay) self.plot_pam_mod_gui(data=sampled_df, ax=self.ax, canvas=self.canvas, cl=pre_medoids, full=_df, ind_run=self.ind_run, ind_fig=index_for_saving_plot, save_plots=self.save_fig) self.log.appendPlainText("") self.log.appendPlainText("RESULTS OF K-MEDOIDS") self.log.appendPlainText("pre_cost: {}".format(pre_cost)) self.log.appendPlainText("pre_choice: {}".format(pre_choice)) self.log.appendPlainText("pre_medoids: {}".format(pre_medoids)) # compute average cost and clusters of whole input dataframe tmp_avg_cost, tmp_medoids = self.average_cost(_df, _fn, pre_choice) self.log.appendPlainText("") self.log.appendPlainText("RESULTS OF WHOLE DATASET EVALUATION") self.log.appendPlainText("tmp_avg_cost: {}".format(tmp_avg_cost)) self.log.appendPlainText("tmp_medoids: {}".format(tmp_medoids)) # if the new cost is lower if tmp_avg_cost < min_avg_cost: self.log.appendPlainText("new_cost is lower, from {0} to {1}".format(round(min_avg_cost, 4), round(tmp_avg_cost, 4))) min_avg_cost = tmp_avg_cost best_choices = list(pre_choice) best_results = dict(tmp_medoids) elif tmp_avg_cost == min_avg_cost: self.log.appendPlainText("new_cost is equal") else: self.log.appendPlainText("new_cost is higher") index_for_saving_plot += 1 self.log.appendPlainText("") self.log.appendPlainText("FINAL RESULT") if self.delay != 0: pause_execution(self.delay) self.plot_pam_mod_gui(data=_df, ax=self.ax, canvas=self.canvas, cl=best_results, full=_df, ind_run=self.ind_run, ind_fig=None, save_plots=self.save_fig) return min_avg_cost, best_choices, best_results
def agg_clust_mod_gui(self, delay=0): """ Perform hierarchical agglomerative clustering with the provided linkage method, plotting every step of cluster aggregation. :param delay: seconds for which to delay the algorithm, so that the images displayes in the GUI show at a slower pace. """ levels = [] levels2 = [] ind_list = [] index_for_saving_plots = 0 # build matrix a, used to store points of clusters with their coordinates l = [[i, i] for i in range(len(self.X))] flat_list = [item for sublist in l for item in sublist] col = [ str(el) + "x" if i % 2 == 0 else str(el) + "y" for i, el in enumerate(flat_list) ] a = pd.DataFrame(index=[str(i) for i in range(len(self.X))], columns=col) a["0x"] = self.X.T[0] a["0y"] = self.X.T[1] b = a.dropna(axis=1, how="all") # initial distance matrix X_dist1 = dist_mat_gen(b) var_sum = 0 levels.append(var_sum) levels2.append(var_sum) # until the desired number of clusters is reached while len(a) > self.n_clust: if self.linkage == "ward": # find indexes corresponding to the minimum increase in total intra-cluster variance b = a.dropna(axis=1, how="all") b = b.fillna(np.inf) ((i, j), var_sum, par_var) = compute_ward_ij(self.X, b) levels.append(var_sum) levels2.append(par_var) ind_list.append((i, j)) new_clust = a.loc[[i, j], :] else: # find indexes corresponding to the minimum distance (i, j) = np.unravel_index( np.array(X_dist1).argmin(), np.array(X_dist1).shape) levels.append(np.min(np.array(X_dist1))) ind_list.append((i, j)) new_clust = a.iloc[[i, j], :] # update distance matrix X_dist1 = update_mat(X_dist1, i, j, self.linkage) a = a.drop([new_clust.iloc[0].name], 0) a = a.drop([new_clust.iloc[1].name], 0) dim1 = int(new_clust.iloc[0].notna().sum()) new_cluster_name = "(" + new_clust.iloc[ 0].name + ")" + "-" + "(" + new_clust.iloc[1].name + ")" a.loc[new_cluster_name, :] = new_clust.iloc[0].fillna( 0) + new_clust.iloc[1].shift(dim1, fill_value=0) if delay != 0: pause_execution(self.delay) if self.linkage != "ward": self.point_plot_mod_gui(a, levels[-1], save_plots=self.save_plots, ind_fig=index_for_saving_plots) else: self.point_plot_mod_gui(a, levels[-2], levels2[-1], save_plots=self.save_plots, ind_fig=index_for_saving_plots) index_for_saving_plots += 1
def OPTICS_gui(self, plot=True, plot_reach=False, delay=0): """ Executes the OPTICS algorithm. Similar to DBSCAN, but uses a priority queue. :param plot: if True, the scatter plot of the function point_plot is displayed at each step. :param plot_reach: if True, the reachability plot is displayed at each step. :param delay: seconds for which to delay the algorithm, so that the images displayes in the GUI show at a slower pace. :return (ClustDist, CoreDist): ClustDist, a dictionary of the form point_index:reach_dist, and CoreDist, a dictionary of the form point_index:core_dist """ self.ClustDist = {} self.CoreDist = {} Seed = {} processed = [] index_for_saving_plots = 0 # create dictionary X_dict = dict(zip([str(i) for i in range(len(self.X))], self.X)) # until all points have been processed while len(processed) != len(self.X): # if queue is empty take a random point if len(Seed) == 0: unprocessed = list(set(list(X_dict.keys())) - set(processed)) (o, r) = (random.choice(unprocessed), np.inf) self.clear_seed_log(Seed, o) # else take the minimum and delete it from the queue else: (o, r) = (min(Seed, key=Seed.get), Seed[min(Seed, key=Seed.get)]) self.clear_seed_log(Seed, o) del Seed[o] self.clear_seed_log(Seed, o) # scan the neighborhood of the point N = scan_neigh1(X_dict, X_dict[o], self.eps) # update the cluster dictionary and the core distance dictionary self.ClustDist.update({o: r}) self.CoreDist.update({o: minPTSdist(X_dict, o, self.mp, self.eps)}) if delay != 0: pause_execution(delay) if plot == True: self.point_plot_gui( X_dict, X_dict[o], N, processed, save_plots=self.save_plots, ind_fig=index_for_saving_plots, ) if plot_reach == True: self.reach_plot_gui( X_dict, save_plots=self.save_plots, ind_fig=index_for_saving_plots, ) index_for_saving_plots += 1 # mark o as processed processed.append(o) # if the point is core if len(N) >= self.mp - 1: # for each unprocessed point in the neighborhood for n in N: if n in processed: continue else: # compute its reach_dist from o p = reach_dist(X_dict, n, o, self.mp, self.eps) # if it is in Seed, update its reach_dist if it is lower if n in Seed: if p < Seed[n]: Seed[n] = p self.clear_seed_log(Seed, o) # else, insert it into the Seed else: Seed.update({n: p}) self.clear_seed_log(Seed, o) self.start_EXTRACT_OPTICS()