def commit(self): map_data = None if self.data and self.xpoints is not None and self.ypoints is not None \ and self.xpoints * self.ypoints == len(self.data): used_names = [ var.name for var in self.data.domain.variables + self.data.domain.metas ] xmeta = Orange.data.ContinuousVariable.make( get_unique_names(used_names, "X")) ymeta = Orange.data.ContinuousVariable.make( get_unique_names(used_names, "Y")) # add new variables for X and Y dimension ot the data domain metas = self.data.domain.metas + (xmeta, ymeta) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) map_data = self.data.transform(domain) map_data[:, xmeta] = np.tile(np.arange(self.xpoints), len(self.data) // self.xpoints).reshape( -1, 1) map_data[:, ymeta] = np.repeat(np.arange(self.ypoints), len(self.data) // self.ypoints).reshape(-1, 1) self.Outputs.map.send(map_data)
def send_data(self): model = self.model clusters = [c if c >= 0 else np.nan for c in model.labels] k = len(set(clusters) - {np.nan}) clusters = np.array(clusters).reshape(len(self.data), 1) core_samples = set(model.projector.core_sample_indices_) in_core = np.array([1 if (i in core_samples) else 0 for i in range(len(self.data))]) in_core = in_core.reshape(len(self.data), 1) domain = self.data.domain attributes, classes = domain.attributes, domain.class_vars meta_attrs = domain.metas names = [var.name for var in chain(attributes, classes, meta_attrs) if var] u_clust_var = get_unique_names(names, "Cluster") clust_var = DiscreteVariable( u_clust_var, values=["C%d" % (x + 1) for x in range(k)]) u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core") in_core_var = DiscreteVariable(u_in_core, values=("0", "1")) x, y, metas = self.data.X, self.data.Y, self.data.metas meta_attrs += (clust_var, ) metas = np.hstack((metas, clusters)) meta_attrs += (in_core_var, ) metas = np.hstack((metas, in_core)) domain = Domain(attributes, classes, meta_attrs) new_table = Table(domain, x, y, metas, self.data.W) self._set_output_summary(new_table) self.Outputs.annotated_data.send(new_table)
def send_data(self): model = self.model clusters = [c if c >= 0 else np.nan for c in model.labels] k = len(set(clusters) - {np.nan}) clusters = np.array(clusters) core_samples = set(model.projector.core_sample_indices_) in_core = np.array([1 if (i in core_samples) else 0 for i in range(len(self.data))]) domain = self.data.domain attributes, classes = domain.attributes, domain.class_vars meta_attrs = domain.metas names = [var.name for var in chain(attributes, classes, meta_attrs) if var] u_clust_var = get_unique_names(names, "Cluster") clust_var = DiscreteVariable( u_clust_var, values=["C%d" % (x + 1) for x in range(k)]) u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core") in_core_var = DiscreteVariable(u_in_core, values=("0", "1")) new_table = self.data.add_column(clust_var, clusters, to_metas=True) new_table = new_table.add_column(in_core_var, in_core, to_metas=True) self.Outputs.annotated_data.send(new_table)
def __call__(self, data): # creates function for transforming data common = _EMSC(self.reference, self.badspectra, self.weights, self.order, self.scaling, data.domain) # takes care of domain column-wise, by above transformation function atts = [a.copy(compute_value=EMSCFeature(i, common)) for i, a in enumerate(data.domain.attributes)] model_metas = [] n_badspec = len(self.badspectra) if self.badspectra is not None else 0 used_names = set([var.name for var in data.domain.variables + data.domain.metas]) if self.output_model: i = len(data.domain.attributes) for o in range(self.order+1): n = get_unique_names(used_names, "EMSC parameter " + str(o)) model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=EMSCModel(i, common))) i += 1 for o in range(n_badspec): n = get_unique_names(used_names, "EMSC parameter bad spec " + str(o)) model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=EMSCModel(i, common))) i += 1 n = get_unique_names(used_names, "EMSC scaling parameter") model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=EMSCModel(i, common))) domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas + tuple(model_metas)) return data.from_table(domain, data)
def __call__(self, data): # creates function for transforming data common = _ME_EMSC(reference=self.reference, weights=self.weights, ncomp=self.ncomp, alpha0=self.alpha0, gamma=self.gamma, maxNiter=self.maxNiter, fixedNiter=self.fixedNiter, positiveRef=self.positiveRef, domain=data.domain) # takes care of domain column-wise, by above transformation function atts = [ a.copy(compute_value=ME_EMSCFeature(i, common)) for i, a in enumerate(data.domain.attributes) ] model_metas = [] n_badspec = self.ncomp # Check if function knows about bad spectra used_names = set( [var.name for var in data.domain.variables + data.domain.metas]) if self.output_model: i = len(data.domain.attributes) for o in range(1): n = get_unique_names(used_names, "EMSC parameter " + str(o)) model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=ME_EMSCModel( i, common))) i += 1 for o in range(n_badspec): n = get_unique_names(used_names, "EMSC parameter bad spec " + str(o)) model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=ME_EMSCModel( i, common))) i += 1 n = get_unique_names(used_names, "EMSC scaling parameter") model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=ME_EMSCModel( i, common))) i += 1 n = get_unique_names(used_names, "Number of iterations") model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=ME_EMSCModel( i, common))) i += 1 n = get_unique_names(used_names, "RMSE") model_metas.append( Orange.data.ContinuousVariable(name=n, compute_value=ME_EMSCModel( i, common))) domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas + tuple(model_metas)) return data.from_table(domain, data)
def commit(self): transformed = data = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) once. self._transformed = self._pca(self.data) transformed = self._transformed if self._variance_ratio is not None: for var, explvar in zip( transformed.domain.attributes, self._variance_ratio[:self.ncomponents]): var.attributes["variance"] = round(explvar, 6) domain = Domain(transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas) transformed = transformed.from_table(domain, transformed) # prevent caching new features by defining compute_value proposed = [a.name for a in self._pca.orig_domain.attributes] meta_name = get_unique_names(proposed, 'components') meta_vars = [StringVariable(name=meta_name)] metas = numpy.array( [['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T if self._variance_ratio is not None: variance_name = get_unique_names(proposed, "variance") meta_vars.append(ContinuousVariable(variance_name)) metas = numpy.hstack( (metas, self._variance_ratio[:self.ncomponents, None])) dom = Domain([ ContinuousVariable(name, compute_value=lambda _: None) for name in proposed ], metas=meta_vars) components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' data_dom = Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + domain.attributes) data = Table.from_numpy(data_dom, self.data.X, self.data.Y, numpy.hstack( (self.data.metas, transformed.X)), ids=self.data.ids) self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.data.send(data) self.Outputs.pca.send(self._pca_projector)
def concatenate_data(tables, filenames, label): if not tables: return None orig_tables = tables # prepare xs from the spectral specific tables for join into a common domain spectral_specific_domains = [] xss = [ t.special_spectral_data[0] for t in tables if hasattr(t, "special_spectral_data") ] xs = reduce(numpy_union_keep_order, xss, np.array([])) if len(xs): attrs = [ContinuousVariable("%f" % f) for f in xs] spectral_specific_domains = [Domain(attrs, None, None)] domain = _merge_domains(spectral_specific_domains + [table.domain for table in tables]) name = get_unique_names(domain, "Filename") source_var = StringVariable(name) name = get_unique_names(domain, "Label") label_var = StringVariable(name) domain = add_columns(domain, metas=(source_var, label_var)) # concatenate tables tables = [table.transform(domain) for table in tables] data = type(tables[0]).concatenate(tables) with data.unlocked(): # fill in spectral data xs_sind = np.argsort(xs) xs_sorted = xs[xs_sind] pos = 0 for table in orig_tables: if hasattr(table, "special_spectral_data"): special = table.special_spectral_data indices = xs_sind[np.searchsorted(xs_sorted, special[0])] data.X[pos:pos + len(table), indices] = special[1] pos += len(table) data[:, source_var] = np.array( list( chain(*(repeat(fn, len(table)) for fn, table in zip(filenames, tables))))).reshape( -1, 1) data[:, label_var] = np.array( list( chain(*(repeat(label, len(table)) for _, table in zip(filenames, tables))))).reshape( -1, 1) return data
def test_get_unique_names_duplicated_proposals(self): names = ["foo", "bar", "baz", "baz (3)"] self.assertEqual(get_unique_names(names, ["foo", "boo", "boo"]), ['foo (1)', 'boo (1)', 'boo (2)']) self.assertEqual(get_unique_names(names, ["foo", "boo", "boo", "baz"]), ['foo (4)', 'boo (4)', 'boo (5)', 'baz (4)']) self.assertEqual(get_unique_names([], ["foo", "boo", "boo", "baz"]), ['foo', 'boo (1)', 'boo (2)', 'baz']) self.assertEqual( get_unique_names(["foo", "bong"], ["foo", "boo", "boo", "baz"]), ['foo (1)', 'boo (1)', 'boo (2)', 'baz']) self.assertEqual( get_unique_names(names, ["foo", "boo", "boo"], equal_numbers=False), ['foo (1)', 'boo (1)', 'boo (2)']) self.assertEqual( get_unique_names(names, ["foo", "boo", "boo", "baz"], equal_numbers=False), ['foo (1)', 'boo (1)', 'boo (2)', 'baz (4)']) self.assertEqual( get_unique_names([], ["foo", "boo", "boo", "baz"], equal_numbers=False), ['foo', 'boo (1)', 'boo (2)', 'baz']) self.assertEqual( get_unique_names(["foo", "bong"], ["foo", "boo", "boo", "baz"], equal_numbers=False), ['foo (1)', 'boo (1)', 'boo (2)', 'baz'])
def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas attrs = self.data.domain.attributes names = [var.name for var in chain(metas, [class_var], attrs)] if self.append_predictions: extra.append(predicted.reshape(-1, 1)) proposed = "{}({})".format(class_var.name, learner_name) name = get_unique_names(names, proposed) var = Orange.data.DiscreteVariable(name, class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) names = [f"p({value})" for value in class_var.values] names = get_unique_names(self.data.domain, names) metas += tuple(map(Orange.data.ContinuousVariable, names)) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if extra: with data.unlocked(data.metas): data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data
def _commit_predictions(self): if not self.data: self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) names = [var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var] uniq_newmetas = [] for new_ in newmetas: uniq = get_unique_names(names, new_.name) if uniq != new_.name: new_ = new_.copy(name=uniq) uniq_newmetas.append(new_) names.append(uniq) metas += uniq_newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions)
def commit(self): self.infolabel.setText(' ') kwargs = {'iterations': self.iterations} if self.method == 0: alg = cd.label_propagation elif self.method == 1: alg = cd.label_propagation_hop_attenuation kwargs['delta'] = self.hop_attenuation if self.net is None: self.Outputs.items.send(None) self.Outputs.network.send(None) self.cluster_feature = None return if self.use_random_state: kwargs['seed'] = 0 labels = alg(self.net, **kwargs) domain = self.net.nodes.domain # Tie a name for presenting clustering results to the widget instance if self.cluster_feature is None: self.cluster_feature = get_unique_names(domain, 'Cluster') cd.add_results_to_items(self.net, labels, self.cluster_feature) self.infolabel.setText('%d clusters found' % len(set(labels.values()))) self.Outputs.items.send(self.net.nodes) self.Outputs.network.send(self.net)
def commit(self): kwargs = {'iterations': self.iterations} if self.attenuate: alg = cd.label_propagation_hop_attenuation kwargs['delta'] = self.hop_attenuation else: alg = cd.label_propagation if self.net is None: self.Outputs.items.send(None) self.Outputs.network.send(None) self.cluster_feature = None self.info.set_output_summary(self.info.NoOutput) return if self.use_random_state: kwargs['seed'] = 0 labels = alg(self.net, **kwargs) domain = self.net.nodes.domain # Tie a name for presenting clustering results to the widget instance if self.cluster_feature is None: self.cluster_feature = get_unique_names(domain, 'Cluster') cd.add_results_to_items(self.net, labels, self.cluster_feature) self.Outputs.items.send(self.net.nodes) self.Outputs.network.send(self.net) nclusters = len(set(labels.values())) self.info.set_output_summary(nclusters, f"{nclusters} clusters")
def _add_metadata(self, corpus: Corpus) -> Corpus: if (corpus is None or "path" not in corpus.domain or self._meta_data is None or (self.META_DATA_FILE_KEY not in self._meta_data.columns and self.CONLLU_META_DATA not in self._meta_data.columns)): return corpus if self.is_conllu: df = self._meta_data.set_index(self.CONLLU_META_DATA) path_column = corpus.get_column_view("utterance")[0] else: df = self._meta_data.set_index( self.startdir + self._meta_data[self.META_DATA_FILE_KEY]) path_column = corpus.get_column_view("path")[0] if len(df.index.drop_duplicates()) != len(df.index): df = df[~df.index.duplicated(keep='first')] filtered = df.reindex(path_column) for name, column in filtered.iteritems(): data = column.astype(str).values val_map, vals, var_type = guess_data_type(data) values, variable = sanitize_variable(val_map, vals, data, var_type, {}, name=get_unique_names( corpus.domain, name)) corpus = corpus.add_column(variable, values, to_metas=True) return corpus
def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, 'Cluster'), values=[ 'C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition)) ]) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition self.Outputs.annotated_data.send(new_table) if Graph is not None: graph = Graph(self.graph) graph.set_items(new_table) self.Outputs.graph.send(graph)
def _send_output(self) -> None: """ Create corpus with scores and output it """ if self.corpus is None: self.Outputs.corpus.send(None) self.Outputs.selected_documents.send(None) return scores, labels = self._gather_scores() if labels: d = self.corpus.domain domain = Domain( d.attributes, d.class_var, metas=d.metas + tuple( ContinuousVariable(get_unique_names(d, l)) for l in labels), ) out_corpus = Corpus( domain, self.corpus.X, self.corpus.Y, np.hstack([self.corpus.metas, scores]), ) Corpus.retain_preprocessing(self.corpus, out_corpus) else: out_corpus = self.corpus self.Outputs.corpus.send( create_annotated_table(out_corpus, self.selected_rows)) self.Outputs.selected_documents.send( out_corpus[self.selected_rows] if self.selected_rows else None)
def test_get_unique_names(self): names = [ "charlie", "bravo", "charlie (2)", "charlie (3)", "bravo (2)", "charlie (4)", "bravo (3)" ] self.assertEqual(get_unique_names(names, ["bravo", "charlie"]), ["bravo (5)", "charlie (5)"])
def _get_projection_variables(self): domain = self.data.domain names = get_unique_names( [v.name for v in domain.variables + domain.metas], self.embedding_variables_names ) return ContinuousVariable(names[0]), ContinuousVariable(names[1])
def commit(self): self.infolabel.setText(' ') if self.method == 0: alg = cd.label_propagation kwargs = {'iterations': self.iterations} elif self.method == 1: alg = cd.label_propagation_hop_attenuation kwargs = { 'iterations': self.iterations, 'delta': self.hop_attenuation } if self.net is None: self.Outputs.items.send(None) self.Outputs.network.send(None) return labels = alg(self.net, **kwargs) domain = self.net.nodes.domain cd.add_results_to_items(self.net, labels, get_unique_names(domain, 'Cluster')) self.infolabel.setText('%d clusters found' % len(set(labels.values()))) self.Outputs.items.send(self.net.nodes) self.Outputs.network.send(self.net)
def __call__(self, data): common = _IntegrateCommon(data.domain) atts = [] if self.limits: methods = self.methods if not isinstance(methods, Iterable): methods = [methods] * len(self.limits) names = self.names if not names: names = [" - ".join("{0}".format(e) for e in l) for l in self.limits] # no names in data should be repeated used_names = [var.name for var in data.domain.variables + data.domain.metas] for i, n in enumerate(names): n = get_unique_names(used_names, n) names[i] = n used_names.append(n) for limits, method, name in zip(self.limits, methods, names): atts.append(Orange.data.ContinuousVariable( name=name, compute_value=method(limits, common))) if not self.metas: domain = Orange.data.Domain(atts, data.domain.class_vars, metas=data.domain.metas) else: domain = Orange.data.Domain(data.domain.attributes, data.domain.class_vars, metas=data.domain.metas + tuple(atts)) return data.from_table(domain, data)
def _transformed_domain(self): dom = self.data.domain orig_coords = (self.attr_lat, self.attr_lon) names = [var.name for var in orig_coords] if not self.replace_original: # If appending, use the same names, just with numbers for uniqueness existing = [v.name for v in chain(dom.variables, dom.metas)] names = get_unique_names(existing, names) transformer = Transformer.from_crs(self.EPSG_CODES[self.from_idx], self.EPSG_CODES[self.to_idx]) transformer_common = GeoTransformerCommon(transformer, *orig_coords) coords = (ContinuousVariable(name, compute_value=GeoTransformer( transformer_common, col)) for col, name in enumerate(names)) if self.replace_original: tr = dict(zip(orig_coords, coords)) def r(variables): return [tr.get(var, var) for var in variables] return Domain(r(dom.attributes), r(dom.class_vars), r(dom.metas)) # Put each new variable in attributes, if it was there, else to metas attrs, metas = list(dom.attributes), list(dom.metas) for orig, new in zip(orig_coords, coords): (attrs if orig in dom.attributes else metas).append(new) return Domain(attrs, dom.class_vars, metas)
def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=[ "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition)) ]) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition summary = len(new_table) if new_table else self.info.NoOutput details = format_summary_details(new_table) if new_table else "" self.info.set_output_summary(summary, details) self.Outputs.annotated_data.send(new_table) if Network is not None: n_edges = self.graph.number_of_edges() edges = sp.coo_matrix( (np.ones(n_edges), np.array(self.graph.edges()).T), shape=(n_edges, n_edges)) graph = Network(new_table, edges) self.Outputs.graph.send(graph)
def _commit_predictions(self): if not self.data: self.Outputs.predictions.send(None) self.info.set_output_summary(self.info.NoOutput) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) names = [ var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var ] uniq_newmetas = [] for new_ in newmetas: uniq = get_unique_names(names, new_.name) if uniq != new_.name: new_ = new_.copy(name=uniq) uniq_newmetas.append(new_) names.append(uniq) metas += uniq_newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns index = self.dataview.model().index map_to = self.dataview.model().mapToSource assert self.selection_store is not None rows = None if self.selection_store.rows: rows = [ ind.row() for ind in self.dataview.selectionModel().selectedRows(0) ] rows.sort() elif self.dataview.model().isSorted() \ or self.predictionsview.model().isSorted(): rows = list(range(len(self.data))) if rows: source_rows = [map_to(index(row, 0)).row() for row in rows] predictions = predictions[source_rows] self.Outputs.predictions.send(predictions) summary = str(len(predictions)) details = format_summary_details(predictions) self.info.set_output_summary(summary, details)
def create_unique_vars(self, names, proposed_names, values=()): unique_vars = [] for proposed in proposed_names: uniq = get_unique_names(names, proposed) if values: unique_vars.append(DiscreteVariable(uniq, values)) else: unique_vars.append(ContinuousVariable(uniq)) names.append(uniq) return unique_vars, names
def _data_with_similarity(self, indices): data = self.data varname = get_unique_names(data.domain, "distance") metas = data.domain.metas + (ContinuousVariable(varname), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) data_metas = self.distances[indices].reshape((-1, 1)) if data.domain.metas: data_metas = np.hstack((data.metas[indices], data_metas)) neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas) neighbors.attributes = self.data.attributes return neighbors
def send_components(self): components = None if self.data is not None and self.projection is not None: proposed = [var.name for var in self.effective_variables] comp_name = get_unique_names(proposed, 'component') meta_attrs = [StringVariable(name=comp_name)] domain = Domain(self.effective_variables, metas=meta_attrs) components = Table(domain, self._send_components_x(), metas=self._send_components_metas()) components.name = "components" self.Outputs.components.send(components)
def apply(self): builtin_warn = warnings.warn def _handle_disconnected_graph_warning(msg, *args, **kwargs): if msg.startswith("Graph is not fully connected"): self.Warning.graph_not_connected() else: builtin_warn(msg, *args, **kwargs) out = None data = self.data method = self.MANIFOLD_METHODS[self.manifold_method_index] have_data = data is not None and len(data) self.Error.clear() self.Warning.clear() if have_data and data.is_sparse(): self.Error.sparse_not_supported() elif have_data: names = [ var.name for var in chain(data.domain.class_vars, data.domain.metas) if var ] proposed = ["C{}".format(i) for i in range(self.n_components)] unique = get_unique_names(names, proposed) domain = Domain([ContinuousVariable(name) for name in unique], data.domain.class_vars, data.domain.metas) try: warnings.warn = _handle_disconnected_graph_warning projector = method(**self.get_method_parameters(data, method)) model = projector(data) if isinstance(model, TSNEModel): out = model.embedding else: X = model.embedding_ out = Table(domain, X, data.Y, data.metas) except ValueError as e: if e.args[0] == "for method='hessian', n_neighbors " \ "must be greater than [n_components" \ " * (n_components + 3) / 2]": n = self.n_components * (self.n_components + 3) / 2 self.Error.n_neighbors_too_small("{}".format(n)) else: self.Error.manifold_error(e.args[0]) except MemoryError: self.Error.out_of_memory() except np.linalg.linalg.LinAlgError as e: self.Error.manifold_error(str(e)) finally: warnings.warn = builtin_warn self._set_output_summary(out) self.Outputs.transformed_data.send(out)
def _table_with_annotation_column(data, values, column_data, var_name): var = DiscreteVariable(get_unique_names(data.domain, var_name), values) class_vars, metas = data.domain.class_vars, data.domain.metas if not data.domain.class_vars: class_vars += (var, ) else: metas += (var, ) domain = Domain(data.domain.attributes, class_vars, metas) table = data.transform(domain) table[:, var] = column_data.reshape((len(data), 1)) return table
def _fit_model(self, data: Table) -> EllipticEnvelopeClassifier: domain = data.domain model = super()._fit_model(data.transform(Domain(domain.attributes))) transformer = _Transformer(model) names = [v.name for v in domain.variables + domain.metas] variable = ContinuousVariable(get_unique_names(names, "Mahalanobis"), compute_value=transformer) transformer.variable = variable model.mahal_var = variable return model
def _commit_predictions(self): if not self.data: self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) names = [ var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var ] uniq_newmetas = [] for new_ in newmetas: uniq = get_unique_names(names, new_.name) if uniq != new_.name: new_ = new_.copy(name=uniq) uniq_newmetas.append(new_) names.append(uniq) metas += uniq_newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) with predictions.unlocked(predictions.metas): predictions.metas[:, -newcolumns.shape[1]:] = newcolumns datamodel = self.dataview.model() predmodel = self.predictionsview.model() assert datamodel is not None # because we have data assert self.selection_store is not None rows = numpy.array(list(self.selection_store.rows)) if rows.size: # Reorder rows as they are ordered in view shown_rows = datamodel.mapFromSourceRows(rows) rows = rows[numpy.argsort(shown_rows)] predictions = predictions[rows] elif datamodel.sortColumn() >= 0 \ or predmodel is not None and predmodel.sortColumn() > 0: # No selection: output all, but in the shown order predictions = predictions[datamodel.mapToSourceRows(...)] self.Outputs.predictions.send(predictions)
def _fit_model(self, data: Table) -> _OutlierModel: domain = data.domain model = super()._fit_model(data.transform(Domain(domain.attributes))) transformer = _Transformer(model) names = [v.name for v in domain.variables + domain.metas] variable = DiscreteVariable(get_unique_names(names, "Outlier"), values=("Yes", "No"), compute_value=transformer) transformer.variable = variable model.outlier_var = variable return model
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)] ) clust_ids = km(self.data) silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 else: self.Warning.no_silhouettes() scores = np.nan new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel() new_table.get_column_view(silhouette_var)[0][:] = scores centroids = Table(Domain(km.pre_domain.attributes), km.centroids) self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, 'Cluster'), values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))] ) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition self.Outputs.annotated_data.send(new_table) if Graph is not None: graph = Graph(self.graph) graph.set_items(new_table) self.Outputs.graph.send(graph)
def __get_var_names(self): n = self.n_components postfixes = ["-x", "-y"] if n == 2 else [str(i) for i in range(n)] names = [f"{self.var_prefix}{postfix}" for postfix in postfixes] domain = self.orig_domain.variables + self.orig_domain.metas return get_unique_names([v.name for v in domain], names)
def _get_var_names(self, n): postfixes = ["x", "y"] if n == 2 else [str(i) for i in range(1, n + 1)] names = [f"{self.var_prefix}-{postfix}" for postfix in postfixes] return get_unique_names(self.orig_domain, names)
def test_get_unique_names(self): names = ["charlie", "bravo", "charlie (2)", "charlie (3)", "bravo (2)", "charlie (4)", "bravo (3)"] self.assertEqual(get_unique_names(names, ["bravo", "charlie"]), ["bravo (5)", "charlie (5)"])
def _get_projection_variables(self): names = get_unique_names( self.data.domain, self.embedding_variables_names) return ContinuousVariable(names[0]), ContinuousVariable(names[1])
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)] ) clust_ids = km(self.data) clust_col = clust_ids.X.ravel() silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 clust_scores = [] for i in range(km.k): in_clust = clust_col == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_col new_table.get_column_view(silhouette_var)[0][:] = scores centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain.attributes else attr for attr in km.pre_domain.attributes] centroid_domain = add_columns( Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) centroids = Table( centroid_domain, km.centroids, None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores)) ) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def _get_var_names(self, n): names = [f"{self.var_prefix}{postfix}" for postfix in range(1, n + 1)] return get_unique_names(self.orig_domain, names)
def test_get_unique_names_with_domain(self): a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"]) domain = Domain([a, b], c, [d]) self.assertEqual(get_unique_names(domain, ["qux"]), ["qux"]) self.assertEqual(get_unique_names(domain, ["foo"]), ["foo (1)"]) self.assertEqual(get_unique_names(domain, ["baz"]), ["baz (4)"]) self.assertEqual(get_unique_names(domain, ["baz (3)"]), ["baz (3) (1)"]) self.assertEqual( get_unique_names(domain, ["qux", "quux"]), ["qux", "quux"]) self.assertEqual( get_unique_names(domain, ["bar", "baz"]), ["bar (4)", "baz (4)"]) self.assertEqual( get_unique_names(domain, ["qux", "baz"]), ["qux (4)", "baz (4)"]) self.assertEqual( get_unique_names(domain, ["qux", "bar"]), ["qux (1)", "bar (1)"]) self.assertEqual(get_unique_names(domain, "qux"), "qux") self.assertEqual(get_unique_names(domain, "foo"), "foo (1)") self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")
def test_get_unique_names(self): names = ["foo", "bar", "baz", "baz (3)"] self.assertEqual(get_unique_names(names, ["qux"]), ["qux"]) self.assertEqual(get_unique_names(names, ["foo"]), ["foo (1)"]) self.assertEqual(get_unique_names(names, ["baz"]), ["baz (4)"]) self.assertEqual(get_unique_names(names, ["baz (3)"]), ["baz (3) (1)"]) self.assertEqual( get_unique_names(names, ["qux", "quux"]), ["qux", "quux"]) self.assertEqual( get_unique_names(names, ["bar", "baz"]), ["bar (4)", "baz (4)"]) self.assertEqual( get_unique_names(names, ["qux", "baz"]), ["qux (4)", "baz (4)"]) self.assertEqual( get_unique_names(names, ["qux", "bar"]), ["qux (1)", "bar (1)"]) self.assertEqual(get_unique_names(names, "qux"), "qux") self.assertEqual(get_unique_names(names, "foo"), "foo (1)") self.assertEqual(get_unique_names(names, "baz"), "baz (4)") self.assertEqual(get_unique_names(tuple(names), "baz"), "baz (4)")