def apply(self): tables, domain, source_var = [], None, None if self.primary_data is not None: tables = [self.primary_data] + list(self.more_data.values()) domain = self.primary_data.domain elif self.more_data: tables = self.more_data.values() if self.merge_type == OWConcatenate.MergeUnion: domain = reduce(domain_union, (table.domain for table in tables)) else: domain = reduce(domain_intersection, (table.domain for table in tables)) if tables and self.append_source_column: assert domain is not None names = [getattr(t, 'name', '') for t in tables] if len(names) != len(set(names)): names = ['{} ({})'.format(name, i) for i, name in enumerate(names)] source_var = Orange.data.DiscreteVariable( self.source_attr_name, values=names ) places = ["class_vars", "attributes", "metas"] domain = add_columns( domain, **{places[self.source_column_role]: (source_var,)}) tables = [table.transform(domain) for table in tables] if tables: data = type(tables[0]).concatenate(tables, axis=0) if source_var: source_ids = np.array(list(flatten( [i] * len(table) for i, table in enumerate(tables)))).reshape((-1, 1)) data[:, source_var] = source_ids else: data = None self.Outputs.data.send(data)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)] ) clust_ids = km(self.data) silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 else: self.Warning.no_silhouettes() scores = np.nan new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel() new_table.get_column_view(silhouette_var)[0][:] = scores centroids = Table(Domain(km.pre_domain.attributes), km.centroids) self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, 'Cluster'), values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))] ) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition self.Outputs.annotated_data.send(new_table) if Graph is not None: graph = Graph(self.graph) graph.set_items(new_table) self.Outputs.graph.send(graph)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_next_name(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)]) clust_ids = km(self.data) silhouette_var = ContinuousVariable(get_next_name( domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 else: self.Warning.no_silhouettes() scores = np.nan new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel() new_table.get_column_view(silhouette_var)[0][:] = scores centroids = Table(Domain(km.pre_domain.attributes), km.centroids) self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)]) clust_ids = km(self.data) clust_col = clust_ids.X.ravel() silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 clust_scores = [] for i in range(km.k): in_clust = clust_col == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_col new_table.get_column_view(silhouette_var)[0][:] = scores centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain.attributes else attr for attr in km.pre_domain.attributes ] centroid_domain = add_columns(Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) centroids = Table( centroid_domain, km.centroids, None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores))) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)]) clust_ids = km.labels silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 clust_scores = [] for i in range(km.k): in_clust = clust_ids == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) with new_table.unlocked(new_table.metas): new_table.get_column_view(cluster_var)[0][:] = clust_ids new_table.get_column_view(silhouette_var)[0][:] = scores domain_attributes = set(domain.attributes) centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain_attributes else attr for attr in km.domain.attributes ] centroid_domain = add_columns(Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) # Table is constructed from a copy of centroids: if data is stored in # the widget, it can be modified, so the widget should preferrably # output a copy. The number of centroids is small, hence copying it is # cheap. centroids = Table( centroid_domain, km.centroids.copy(), None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores))) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)] ) clust_ids = km(self.data) clust_col = clust_ids.X.ravel() silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 clust_scores = [] for i in range(km.k): in_clust = clust_col == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_col new_table.get_column_view(silhouette_var)[0][:] = scores centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain.attributes else attr for attr in km.pre_domain.attributes] centroid_domain = add_columns( Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) centroids = Table( centroid_domain, km.centroids, None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores)) ) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)