Ejemplo n.º 1
0
    def get_topics_table_by_id(self, topic_id):
        """
        Transform topics from gensim LDA model to table.

        :param lda: gensim LDA model.
        :return: `Orange.data.table.Table`.
        """
        topics = self.lda.show_topics(num_topics=-1,
                                      num_words=MAX_WORDS,
                                      formatted=False)
        if topic_id >= len(topics):
            raise ValueError("Too large topic ID.")

        num_words = max([len(it) for it in topics])

        data = np.zeros((num_words, 2), dtype=object)
        data[:, 0] = [item[1] for item in topics[topic_id]]
        data[:, 1] = [item[0] for item in topics[topic_id]]

        metas = [
            StringVariable(self.topic_names[topic_id]),
            ContinuousVariable("Topic{}_weights".format(topic_id + 1))
        ]
        metas[-1]._out_format = '%.2e'

        domain = Domain([], metas=metas)
        t = Topics.from_numpy(domain, X=np.zeros((num_words, 0)), metas=data)
        t.W = data[:, 1]
        return t
Ejemplo n.º 2
0
    def __init__(self, parent=None, signalManager=None, settings=None):
        super().__init__(parent, signalManager, settings)

        self.data = None

        self.undoStack = QtGui.QUndoStack(self)

        self.plot = PaintDataPlot(self.mainArea, "Painted Plot", widget=self)
        self.classValuesModel = ColoredListModel(
            ["Class-1", "Class-2"],
            self,
            self.plot,
            flags=QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled
            | QtCore.Qt.ItemIsEditable)
        self.classValuesModel.dataChanged.connect(self.classNameChange)
        self.data = Table(
            Domain([
                ContinuousVariable(self.attr1),
                ContinuousVariable(self.attr2)
            ], DiscreteVariable("Class", values=self.classValuesModel)))

        self.toolsStackCache = {}

        self.initUI()
        self.initPlot()
        self.updatePlot()
Ejemplo n.º 3
0
    def test_corpus_not_eq(self):
        c = Corpus.from_file('bookexcerpts')
        n_doc = c.X.shape[0]

        c2 = Corpus(c.X, c.Y, c.metas, c.domain, [])
        self.assertNotEqual(c, c2)

        c2 = Corpus(np.ones((n_doc, 1)), c.Y, c.metas, c.domain,
                    c.text_features)
        self.assertNotEqual(c, c2)

        c2 = Corpus(c.X, np.ones((n_doc, 1)), c.metas, c.domain,
                    c.text_features)
        self.assertNotEqual(c, c2)

        broken_metas = np.copy(c.metas)
        broken_metas[0, 0] = ''
        c2 = Corpus(c.X, c.Y, broken_metas, c.domain, c.text_features)
        self.assertNotEqual(c, c2)

        new_meta = [StringVariable('text2')]
        broken_domain = Domain(c.domain.attributes, c.domain.class_var,
                               new_meta)
        c2 = Corpus(c.X, c.Y, c.metas, broken_domain, new_meta)
        self.assertNotEqual(c, c2)
Ejemplo n.º 4
0
    def get_topics_table_by_id(self, topic_id):
        """
        Transform topics from gensim LDA model to table.

        :param lda: gensim LDA model.
        :return: `Orange.data.table.Table`.
        """
        words = self._topics_words(MAX_WORDS)
        weights = self._topics_weights(MAX_WORDS)
        if topic_id >= len(words):
            raise ValueError("Too large topic ID.")

        num_words = len(words[topic_id])

        data = np.zeros((num_words, 2), dtype=object)
        data[:, 0] = words[topic_id]
        data[:, 1] = weights[topic_id]

        metas = [
            StringVariable(self.topic_names[topic_id]),
            ContinuousVariable("Topic{}_weights".format(topic_id + 1))
        ]
        metas[-1]._out_format = '%.2e'

        domain = Domain([], metas=metas)
        t = Topics.from_numpy(domain, X=np.zeros((num_words, 0)), metas=data)
        t.W = data[:, 1]
        return t
Ejemplo n.º 5
0
 def sendData(self):
     data = self.data
     values = set([str(ex.get_class()) for ex in data])
     if len(values) == 1:
         # Remove the useless class variable.
         domain = Domain(data.domain.attributes)
         data = Table(domain, data)
     self.send("Data", data)
Ejemplo n.º 6
0
    def k_net_table(self, v_list):

        values = [
            ContinuousVariable(name='Distance', number_of_decimals=0),
            ContinuousVariable(name='Knet score'),
        ]
        domain = Domain(values)

        return Table(domain, [[n, v] for n, v in enumerate(v_list)])
Ejemplo n.º 7
0
    def k_node_table(self, v_list):

        dist_values = {l[0] for l in v_list}

        values = [
            DiscreteVariable(name='Protein', values=dist_values),
            ContinuousVariable(name='Knode score'),
            ContinuousVariable(name='Distance', number_of_decimals=0),
        ]
        domain = Domain(values)

        return Table(domain, v_list)
Ejemplo n.º 8
0
def _create_table(words, scores: Mapping[str, np.array]) -> Table:
    """
    Create an Orange table from the word scores
    :param words: list of words
    :param scores: mapping of {label: score_array}. Use ordereddict to preserve column order
    :return: a Table object
    """
    values = list(scores.values())
    order = (-values[0]).argsort()
    data = np.column_stack(values)[order]
    words = np.array(words).reshape(len(words), 1)[order]
    domain = Domain([ContinuousVariable(label) for label in scores],
                    metas=[StringVariable("term")])
    return Table(domain, data, metas=words)
Ejemplo n.º 9
0
    def insert_topics_into_corpus(self, corp_in):
        """
        Insert topical representation into corpus.

        :param corp_in: Corpus into whic we want to insert topical representations
        :return: `Orange.data.table.Table`
        """
        matrix = matutils.corpus2dense(self.corpus,
                                       num_terms=self.num_topics).T

        # Generate the new table.
        attr = [ContinuousVariable(n) for n in self.topic_names]
        domain = Domain(attr,
                        corp_in.domain.class_vars,
                        metas=corp_in.domain.metas)

        return Table.from_numpy(domain,
                                matrix,
                                Y=corp_in._Y,
                                metas=corp_in.metas)
Ejemplo n.º 10
0
    def redo(self):
        self.label = self.classValuesModel.pop(self.index)
        examples = [
            ex for ex in self.data if str(ex.get_class()) != self.label
        ]
        newdomain = Domain([
            ContinuousVariable(self.widget.attr1),
            ContinuousVariable(self.widget.attr2)
        ], DiscreteVariable("Class", values=self.classValuesModel))
        newdata = Table(newdomain)
        for ex in examples:
            if str(ex.get_class()) != self.label and \
                    str(ex.get_class()) in self.classValuesModel:
                newdata.append(
                    Instance(newdomain,
                             [float(ex[a]) for a in ex.domain.attributes] +
                             [str(ex.get_class())]))

        self.widget.data = newdata
        self.widget.updatePlot()
        self.widget.updateCursor()
Ejemplo n.º 11
0
 def redo(self):
     newdomain = Domain([
         ContinuousVariable(self.widget.attr1),
         ContinuousVariable(self.widget.attr2)
     ], DiscreteVariable("Class", values=self.classValuesModel))
     newdata = Table(newdomain)
     for ex in self.data:
         print(ex.get_class())
         if str(ex.get_class()) not in self.classValuesModel:
             self.oldLabelName = str(ex.get_class())
             instance = Instance(
                 newdomain,
                 [float(ex[a])
                  for a in ex.domain.attributes] + [self.changedLabel])
             newdata.append(instance)
         else:
             newdata.append(
                 Instance(newdomain,
                          [float(ex[a]) for a in ex.domain.attributes] +
                          [str(ex.get_class())]))
     self.widget.data = newdata
     self.widget.updatePlot()
Ejemplo n.º 12
0
    def redo(self):
        self.classValuesModel.append(self.newClassLabel)
        newdomain = Domain([
            ContinuousVariable(self.widget.attr1),
            ContinuousVariable(self.widget.attr2)
        ], DiscreteVariable("Class", values=self.classValuesModel))
        newdata = Table(newdomain)
        instances = [
            Instance(newdomain,
                     [float(ex[a])
                      for a in ex.domain.attributes] + [str(ex.get_class())])
            for ex in self.data
        ]

        newdata.extend(instances)
        self.widget.data = newdata
        self.widget.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
        newindex = self.classValuesModel.index(len(self.classValuesModel) - 1)
        self.widget.classValuesView.selectionModel().select(
            newindex, QtGui.QItemSelectionModel.ClearAndSelect)
        self.widget.updatePlot()
        self.widget.updateCursor()