def get_topics_table_by_id(self, topic_id): """ Transform topics from gensim LDA model to table. :param lda: gensim LDA model. :return: `Orange.data.table.Table`. """ topics = self.lda.show_topics(num_topics=-1, num_words=MAX_WORDS, formatted=False) if topic_id >= len(topics): raise ValueError("Too large topic ID.") num_words = max([len(it) for it in topics]) data = np.zeros((num_words, 2), dtype=object) data[:, 0] = [item[1] for item in topics[topic_id]] data[:, 1] = [item[0] for item in topics[topic_id]] metas = [ StringVariable(self.topic_names[topic_id]), ContinuousVariable("Topic{}_weights".format(topic_id + 1)) ] metas[-1]._out_format = '%.2e' domain = Domain([], metas=metas) t = Topics.from_numpy(domain, X=np.zeros((num_words, 0)), metas=data) t.W = data[:, 1] return t
def __init__(self, parent=None, signalManager=None, settings=None): super().__init__(parent, signalManager, settings) self.data = None self.undoStack = QtGui.QUndoStack(self) self.plot = PaintDataPlot(self.mainArea, "Painted Plot", widget=self) self.classValuesModel = ColoredListModel( ["Class-1", "Class-2"], self, self.plot, flags=QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsEditable) self.classValuesModel.dataChanged.connect(self.classNameChange) self.data = Table( Domain([ ContinuousVariable(self.attr1), ContinuousVariable(self.attr2) ], DiscreteVariable("Class", values=self.classValuesModel))) self.toolsStackCache = {} self.initUI() self.initPlot() self.updatePlot()
def test_corpus_not_eq(self): c = Corpus.from_file('bookexcerpts') n_doc = c.X.shape[0] c2 = Corpus(c.X, c.Y, c.metas, c.domain, []) self.assertNotEqual(c, c2) c2 = Corpus(np.ones((n_doc, 1)), c.Y, c.metas, c.domain, c.text_features) self.assertNotEqual(c, c2) c2 = Corpus(c.X, np.ones((n_doc, 1)), c.metas, c.domain, c.text_features) self.assertNotEqual(c, c2) broken_metas = np.copy(c.metas) broken_metas[0, 0] = '' c2 = Corpus(c.X, c.Y, broken_metas, c.domain, c.text_features) self.assertNotEqual(c, c2) new_meta = [StringVariable('text2')] broken_domain = Domain(c.domain.attributes, c.domain.class_var, new_meta) c2 = Corpus(c.X, c.Y, c.metas, broken_domain, new_meta) self.assertNotEqual(c, c2)
def get_topics_table_by_id(self, topic_id): """ Transform topics from gensim LDA model to table. :param lda: gensim LDA model. :return: `Orange.data.table.Table`. """ words = self._topics_words(MAX_WORDS) weights = self._topics_weights(MAX_WORDS) if topic_id >= len(words): raise ValueError("Too large topic ID.") num_words = len(words[topic_id]) data = np.zeros((num_words, 2), dtype=object) data[:, 0] = words[topic_id] data[:, 1] = weights[topic_id] metas = [ StringVariable(self.topic_names[topic_id]), ContinuousVariable("Topic{}_weights".format(topic_id + 1)) ] metas[-1]._out_format = '%.2e' domain = Domain([], metas=metas) t = Topics.from_numpy(domain, X=np.zeros((num_words, 0)), metas=data) t.W = data[:, 1] return t
def sendData(self): data = self.data values = set([str(ex.get_class()) for ex in data]) if len(values) == 1: # Remove the useless class variable. domain = Domain(data.domain.attributes) data = Table(domain, data) self.send("Data", data)
def k_net_table(self, v_list): values = [ ContinuousVariable(name='Distance', number_of_decimals=0), ContinuousVariable(name='Knet score'), ] domain = Domain(values) return Table(domain, [[n, v] for n, v in enumerate(v_list)])
def k_node_table(self, v_list): dist_values = {l[0] for l in v_list} values = [ DiscreteVariable(name='Protein', values=dist_values), ContinuousVariable(name='Knode score'), ContinuousVariable(name='Distance', number_of_decimals=0), ] domain = Domain(values) return Table(domain, v_list)
def _create_table(words, scores: Mapping[str, np.array]) -> Table: """ Create an Orange table from the word scores :param words: list of words :param scores: mapping of {label: score_array}. Use ordereddict to preserve column order :return: a Table object """ values = list(scores.values()) order = (-values[0]).argsort() data = np.column_stack(values)[order] words = np.array(words).reshape(len(words), 1)[order] domain = Domain([ContinuousVariable(label) for label in scores], metas=[StringVariable("term")]) return Table(domain, data, metas=words)
def insert_topics_into_corpus(self, corp_in): """ Insert topical representation into corpus. :param corp_in: Corpus into whic we want to insert topical representations :return: `Orange.data.table.Table` """ matrix = matutils.corpus2dense(self.corpus, num_terms=self.num_topics).T # Generate the new table. attr = [ContinuousVariable(n) for n in self.topic_names] domain = Domain(attr, corp_in.domain.class_vars, metas=corp_in.domain.metas) return Table.from_numpy(domain, matrix, Y=corp_in._Y, metas=corp_in.metas)
def redo(self): self.label = self.classValuesModel.pop(self.index) examples = [ ex for ex in self.data if str(ex.get_class()) != self.label ] newdomain = Domain([ ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2) ], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) for ex in examples: if str(ex.get_class()) != self.label and \ str(ex.get_class()) in self.classValuesModel: newdata.append( Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())])) self.widget.data = newdata self.widget.updatePlot() self.widget.updateCursor()
def redo(self): newdomain = Domain([ ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2) ], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) for ex in self.data: print(ex.get_class()) if str(ex.get_class()) not in self.classValuesModel: self.oldLabelName = str(ex.get_class()) instance = Instance( newdomain, [float(ex[a]) for a in ex.domain.attributes] + [self.changedLabel]) newdata.append(instance) else: newdata.append( Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())])) self.widget.data = newdata self.widget.updatePlot()
def redo(self): self.classValuesModel.append(self.newClassLabel) newdomain = Domain([ ContinuousVariable(self.widget.attr1), ContinuousVariable(self.widget.attr2) ], DiscreteVariable("Class", values=self.classValuesModel)) newdata = Table(newdomain) instances = [ Instance(newdomain, [float(ex[a]) for a in ex.domain.attributes] + [str(ex.get_class())]) for ex in self.data ] newdata.extend(instances) self.widget.data = newdata self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1) newindex = self.classValuesModel.index(len(self.classValuesModel) - 1) self.widget.classValuesView.selectionModel().select( newindex, QtGui.QItemSelectionModel.ClearAndSelect) self.widget.updatePlot() self.widget.updateCursor()