def get_all_topics_table(self): """ Transform all topics from gensim model to table. """ all_words = self._topics_words(self.n_words) all_weights = self._topics_weights(self.n_words) sorted_words = sorted(all_words[0]) n_topics = len(all_words) X = [] for words, weights in zip(all_words, all_weights): weights = [we for wo, we in sorted(zip(words, weights))] X.append(weights) X = np.array(X) # take only first n_topics; e.g. when user requested 10, but gensim # returns only 9 — when the rank is lower than num_topics requested names = np.array(self.topic_names[:n_topics], dtype=object)[:, None] attrs = [ContinuousVariable(w) for w in sorted_words] metas = [ StringVariable('Topics'), ContinuousVariable('Marginal Topic Probability') ] topic_proba = np.array(self._marginal_probability( self.tokens, self.doc_topic), dtype=object) t = Table.from_numpy(Domain(attrs, metas=metas), X=X, metas=np.hstack((names, topic_proba))) t.name = 'All topics' return t
def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) printData = self.data printDomain = self.data.domain if not (domain.variables or domain.metas): table = None else: X, y, m = cols #X是输入,domain.attributes;y是输出class_var;m是元特征 ## 下面解决将self.data的数据付给了table。 # 1data's name; 2数据编号ids;3数据属性attributes table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name index = self.data.ids table.ids = np.array(self.data.ids) # print('ids',table.ids) data = self.data table.attributes = getattr(self.data, 'attributes', {}) ## 将table的属性定义为{} ''' 对Ourputs的data赋值为table''' # print('table is :',table) # print('table domain',table.domain) # print('table name',table.name) # print('table class_var name',table.domain.class_vars[0].name) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") summary = len(table) if table else self.info.NoOutput details = format_summary_details(table) if table else "" self.info.set_output_summary(summary, details) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Warning.multiple_targets( shown=table is not None and len(table.domain.class_vars) > 1) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def commit(self): if self._embedder != '' and self._smiles_attr != '': smiles = self.data[:, self._smiles_attr].metas.flatten() embedded, valid = self.to_fingerprints(smiles, self._embedder) invalid = list(set(range(len(smiles))) - set(valid)) if not valid == []: domain = [ContinuousVariable.make("C_{}".format(x)) for x in range(embedded.shape[1])] if self._embedder == MACCS: domain = [ContinuousVariable.make(name) for name, _ in Chem.MACCSkeys.smartsPatts.values()] domain.append(ContinuousVariable.make('?')) embedded_table = Table.from_numpy( Domain(domain, self.data.domain.class_vars, self.data.domain.metas ), embedded, self.data.Y[valid], self.data.metas[valid], self.data.W[valid] ) self.Outputs.embedded_smiles.send(embedded_table) else: self.Outputs.embedded_smiles.send(None) if not invalid == []: invalid_table = Table.from_numpy( self.data.domain, self.data.X[invalid], self.data.Y[invalid], self.data.metas[invalid], self.data.W[invalid] ) self.Outputs.skipped_smiles.send(invalid_table) else: self.Outputs.skipped_smiles.send(None)
def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list( str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [ np.nan if is_missing(x) else values.index(str(x)) for x in col_data ] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [ orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data ] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any( isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): if self.data is not None: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) else: table = self.data self.send("Data", table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain(self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain(self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list(str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [np.nan if is_missing(x) else values.index(str(x)) for x in col_data] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any(isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): if self.data is not None: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) X, y, m = cols X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any( isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) else: table = self.data self.send("Data", table) self.apply_button.setEnabled(False)
def apply_domain_edit(self): self.Warning.performance_warning.clear() if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) self.Outputs.data.send(table) self.apply_button.setEnabled(False)
def insert_topics_into_corpus(self, corp_in): """ Insert topical representation into corpus. :param corp_in: Corpus into whic we want to insert topical representations :return: `Orange.data.table.Table` """ matrix = matutils.corpus2dense(self.corpus, num_terms=self.num_topics).T # Generate the new table. attr = [ContinuousVariable(n) for n in self.topic_names] domain = Domain(attr, corp_in.domain.class_vars, metas=corp_in.domain.metas) return Table.from_numpy(domain, matrix, Y=corp_in._Y, metas=corp_in.metas)
def insert_topics_into_corpus(self, corp_in): """ Insert topical representation into corpus. :param corp_in: Corpus into whic we want to insert topical representations :return: `Orange.data.table.Table` """ matrix = matutils.corpus2dense(self.corpus, num_terms=self.num_topics).T # Generate the new table. attr = [ContinuousVariable(n) for n in self.topic_names] domain = Domain(attr, corp_in.domain.class_vars, metas=corp_in.domain.metas) return Table.from_numpy(domain, matrix, Y=corp_in._Y, metas=corp_in.metas)
def get_all_topics_table(self): """ Transform all topics from gensim model to table. """ all_words = self._topics_words(self.n_words) all_weights = self._topics_weights(self.n_words) sorted_words = sorted(all_words[0]) n_topics = len(all_words) X = [] for words, weights in zip(all_words, all_weights): weights = [we for wo, we in sorted(zip(words, weights))] X.append(weights) X = np.array(X).T # take only first n_topics; e.g. when user requested 10, but gensim # returns only 9 — when the rank is lower than num_topics requested attrs = [ContinuousVariable(n) for n in self.topic_names[:n_topics]] t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]), X=X, metas=np.array(sorted_words)[:, None]) t.name = 'All topics' return t
def get_all_topics_table(self): """ Transform all topics from gensim model to table. """ all_words = self._topics_words(self.n_words) all_weights = self._topics_weights(self.n_words) sorted_words = sorted(all_words[0]) n_topics = len(all_words) X = [] for words, weights in zip(all_words, all_weights): weights = [we for wo, we in sorted(zip(words, weights))] X.append(weights) X = np.array(X).T # take only first n_topics; e.g. when user requested 10, but gensim # returns only 9 — when the rank is lower than num_topics requested attrs = [ContinuousVariable(n) for n in self.topic_names[:n_topics]] t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]), X=X, metas=np.array(sorted_words)[:, None]) t.name = 'All topics' return t