def _visualize_DBSCAN(self, cluster_name): n_clusters_ = len(set(self.df[cluster_name].tolist())) if self.dimensions == 2: DataVisualization.visualize_plot_2d( self.df, hue=cluster_name, palette=DataVisualization.create_categorical_palette( n_clusters_))
def cluster_comparison_optics(self, radius, neighbors, visualize=False): self.logger.info("Starting default optics Algorithm on All Points") comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_optics(radius, neighbors) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_comparison_mean_shift(self, bandwidth=None, visualize=False): self.logger.info("Starting default mean shift Algorithm on All Points") comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_mean_shift(bandwidth) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_comparison_clique(self, intervals, threshold, visualize=False): self.logger.info("Starting default clique Algorithm on All Points") comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_clique(intervals, threshold) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_comparison_DBSCAN(self, eps, min_samples, visualize=False): self.logger.info("Starting default DBSCAN Algorithm on All Points") comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_DBSCAN(eps, min_samples) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_comparison_k_means(self, k, visualize=False): self.logger.info('Starting default k-Means Algorithm on All Points') comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_k_means(k) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_scubi_DBSCAN(self, eps, min_samples, visualize=False): self.logger.info('Starting SCUBI-DBSCAN Algorithm on Border Points') border_point_clustering = BorderPointClustering(self.data_frame) border_point_clustering.cluster_SCUBI_DBSCAN(eps, min_samples, visualize) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=border_point_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( border_point_clustering.cluster_count))
def cluster_comparison_hierarchical_single_link(self, cluster_count, visualize=False): self.logger.info( "Starting default hierarchical single link Algorithm on All Points" ) comparison_clustering = ComparisonClustering(self.data_frame) comparison_clustering.cluster_hierarchical(cluster_count) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=comparison_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( comparison_clustering.cluster_count))
def cluster_hierarchical_single_link(self, cluster_count, modifier=1, visualize=False): self.logger.info('Starting Hierarchical Clustering using Single-Link') border_point_clustering = BorderPointClustering(self.data_frame) border_point_clustering.cluster_hierarchical(cluster_count, modifier) if visualize and self.data_frame.dimensions == 2: DataVisualization.visualize_plot_2d( self.data_frame.df, hue=border_point_clustering.clustering_name, palette=DataVisualization.create_categorical_palette( border_point_clustering.cluster_count)) return border_point_clustering.clustering_result
def visualize(df, dimensions, visualize, hue, title=None, k=None, show_directions=False): """ Common visualization function for scatter plot to show points with exactly ONE certain hue :param df: The data points :param dimensions: Dimensions of the data :param visualize: If the df intsys to be visualized :param hue: The column to be used as hue from the df frame :param title: The title of the plot :return: Displays the plot """ if visualize: if dimensions == 2: if show_directions: edges = 0 DataVisualization.visualize_plot_2d(df, k=k, hue=hue, reverse=False, title=title, edges=edges) else: DataVisualization.visualize_plot_2d(df, k=k, hue=hue, reverse=False, title=title) elif dimensions == 3: DataVisualization.visualize_plot_3d(df, hue=hue) else: logging.warning("Cannot visualize " + str(dimensions) + " dimensions in a scatter plot")