Esempio n. 1
0
    def get_all_topics_table(self):
        """ Transform all topics from gensim model to table. """
        all_words = self._topics_words(self.n_words)
        all_weights = self._topics_weights(self.n_words)
        sorted_words = sorted(all_words[0])
        n_topics = len(all_words)

        X = []
        for words, weights in zip(all_words, all_weights):
            weights = [we for wo, we in sorted(zip(words, weights))]
            X.append(weights)
        X = np.array(X)

        # take only first n_topics; e.g. when user requested 10, but gensim
        # returns only 9 — when the rank is lower than num_topics requested
        names = np.array(self.topic_names[:n_topics], dtype=object)[:, None]

        attrs = [ContinuousVariable(w) for w in sorted_words]
        metas = [
            StringVariable('Topics'),
            ContinuousVariable('Marginal Topic Probability')
        ]

        topic_proba = np.array(self._marginal_probability(
            self.tokens, self.doc_topic),
                               dtype=object)

        t = Table.from_numpy(Domain(attrs, metas=metas),
                             X=X,
                             metas=np.hstack((names, topic_proba)))
        t.name = 'All topics'
        return t
Esempio n. 2
0
    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            printData = self.data
            printDomain = self.data.domain
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                #X是输入,domain.attributes;y是输出class_var;m是元特征
                ## 下面解决将self.data的数据付给了table。
                # 1data's name; 2数据编号ids;3数据属性attributes
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                index = self.data.ids
                table.ids = np.array(self.data.ids)
                # print('ids',table.ids)

                data = self.data
                table.attributes = getattr(self.data, 'attributes', {})
                ## 将table的属性定义为{}
                ''' 对Ourputs的data赋值为table'''
        # print('table is :',table)
        # print('table domain',table.domain)
        # print('table name',table.name)
        # print('table class_var name',table.domain.class_vars[0].name)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 3
0
    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        self.Warning.renamed_vars.clear()
        if self.data is None:
            table = None
        else:
            domain, cols, renamed = \
                self.domain_editor.get_domain(self.data.domain, self.data,
                                              deduplicate=True)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)
            if renamed:
                self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}")

        summary = len(table) if table else self.info.NoOutput
        details = format_summary_details(table) if table else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 4
0
    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        self.Warning.renamed_vars.clear()
        if self.data is None:
            table = None
        else:
            domain, cols, renamed = \
                self.domain_editor.get_domain(self.data.domain, self.data,
                                              deduplicate=True)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)
            if renamed:
                self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}")

        self.Warning.multiple_targets(
            shown=table is not None and len(table.domain.class_vars) > 1)
        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 5
0
    def commit(self):
        if self._embedder != '' and self._smiles_attr != '':
            smiles = self.data[:, self._smiles_attr].metas.flatten()
            embedded, valid = self.to_fingerprints(smiles, self._embedder)
            invalid = list(set(range(len(smiles))) - set(valid))

            if not valid == []:

                domain = [ContinuousVariable.make("C_{}".format(x)) for x in
                         range(embedded.shape[1])]
                if self._embedder == MACCS:
                    domain = [ContinuousVariable.make(name)
                              for name, _ in Chem.MACCSkeys.smartsPatts.values()]
                    domain.append(ContinuousVariable.make('?'))
                embedded_table = Table.from_numpy(
                    Domain(domain,
                        self.data.domain.class_vars,
                        self.data.domain.metas
                    ),
                    embedded,
                    self.data.Y[valid],
                    self.data.metas[valid],
                    self.data.W[valid]
                )
                self.Outputs.embedded_smiles.send(embedded_table)
            else:
                self.Outputs.embedded_smiles.send(None)

            if not invalid == []:
                invalid_table = Table.from_numpy(
                    self.data.domain,
                    self.data.X[invalid],
                    self.data.Y[invalid],
                    self.data.metas[invalid],
                    self.data.W[invalid]
                )
                self.Outputs.skipped_smiles.send(invalid_table)
            else:
                self.Outputs.skipped_smiles.send(None)
Esempio n. 6
0
    def apply_domain_edit(self):
        attributes = []
        class_vars = []
        metas = []
        places = [attributes, class_vars, metas]
        X, y, m = [], [], []
        cols = [X, y, m]  # Xcols, Ycols, Mcols

        def is_missing(x):
            return str(x) in ("nan", "")

        for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \
            zip(count(), self.editor_model.variables,
                chain([(at, 0) for at in self.data.domain.attributes],
                      [(cl, 1) for cl in self.data.domain.class_vars],
                      [(mt, 2) for mt in self.data.domain.metas])):
            if place == 3:
                continue
            if orig_plc == 2:
                col_data = list(chain(*self.data[:, orig_var].metas))
            else:
                col_data = list(chain(*self.data[:, orig_var]))
            if name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == DiscreteVariable:
                values = list(
                    str(i) for i in set(col_data) if not is_missing(i))
                var = tpe(name, values)
                col_data = [
                    np.nan if is_missing(x) else values.index(str(x))
                    for x in col_data
                ]
            elif tpe == StringVariable and type(orig_var) == DiscreteVariable:
                var = tpe(name)
                col_data = [
                    orig_var.repr_val(x) if not np.isnan(x) else ""
                    for x in col_data
                ]
            else:
                var = tpe(name)
            places[place].append(var)
            cols[place].append(col_data)
        domain = Domain(attributes, class_vars, metas)
        X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
        y = np.array(y).T if len(y) else None
        dtpe = object if any(
            isinstance(m, StringVariable) for m in domain.metas) else float
        m = np.array(m, dtype=dtpe).T if len(m) else None
        table = Table.from_numpy(domain, X, y, m, self.data.W)
        self.send("Data", table)
        self.apply_button.setEnabled(False)
Esempio n. 7
0
    def apply_domain_edit(self):
        if self.data is not None:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            X, y, m = cols
            table = Table.from_numpy(domain, X, y, m, self.data.W)
            table.name = self.data.name
            table.ids = np.array(self.data.ids)
            table.attributes = getattr(self.data, 'attributes', {})
        else:
            table = self.data

        self.send("Data", table)
        self.apply_button.setEnabled(False)
Esempio n. 8
0
    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 9
0
    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 10
0
    def apply_domain_edit(self):
        attributes = []
        class_vars = []
        metas = []
        places = [attributes, class_vars, metas]
        X, y, m = [], [], []
        cols = [X, y, m]  # Xcols, Ycols, Mcols

        def is_missing(x):
            return str(x) in ("nan", "")

        for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \
            zip(count(), self.editor_model.variables,
                chain([(at, 0) for at in self.data.domain.attributes],
                      [(cl, 1) for cl in self.data.domain.class_vars],
                      [(mt, 2) for mt in self.data.domain.metas])):
            if place == 3:
                continue
            if orig_plc == 2:
                col_data = list(chain(*self.data[:, orig_var].metas))
            else:
                col_data = list(chain(*self.data[:, orig_var]))
            if name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == DiscreteVariable:
                values = list(str(i) for i in set(col_data) if not is_missing(i))
                var = tpe(name, values)
                col_data = [np.nan if is_missing(x) else values.index(str(x))
                            for x in col_data]
            elif tpe == StringVariable and type(orig_var) == DiscreteVariable:
                var = tpe(name)
                col_data = [orig_var.repr_val(x) if not np.isnan(x) else ""
                            for x in col_data]
            else:
                var = tpe(name)
            places[place].append(var)
            cols[place].append(col_data)
        domain = Domain(attributes, class_vars, metas)
        X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
        y = np.array(y).T if len(y) else None
        dtpe = object if any(isinstance(m, StringVariable)
                             for m in domain.metas) else float
        m = np.array(m, dtype=dtpe).T if len(m) else None
        table = Table.from_numpy(domain, X, y, m, self.data.W)
        self.send("Data", table)
        self.apply_button.setEnabled(False)
Esempio n. 11
0
    def apply_domain_edit(self):
        if self.data is not None:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            X, y, m = cols
            X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
            y = np.array(y).T if len(y) else None
            dtpe = object if any(
                isinstance(m, StringVariable) for m in domain.metas) else float
            m = np.array(m, dtype=dtpe).T if len(m) else None
            table = Table.from_numpy(domain, X, y, m, self.data.W)
            table.name = self.data.name
            table.ids = np.array(self.data.ids)
            table.attributes = getattr(self.data, 'attributes', {})
        else:
            table = self.data

        self.send("Data", table)
        self.apply_button.setEnabled(False)
Esempio n. 12
0
    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)
Esempio n. 13
0
    def insert_topics_into_corpus(self, corp_in):
        """
        Insert topical representation into corpus.

        :param corp_in: Corpus into whic we want to insert topical representations
        :return: `Orange.data.table.Table`
        """
        matrix = matutils.corpus2dense(self.corpus,
                                       num_terms=self.num_topics).T

        # Generate the new table.
        attr = [ContinuousVariable(n) for n in self.topic_names]
        domain = Domain(attr,
                        corp_in.domain.class_vars,
                        metas=corp_in.domain.metas)

        return Table.from_numpy(domain,
                                matrix,
                                Y=corp_in._Y,
                                metas=corp_in.metas)
Esempio n. 14
0
    def insert_topics_into_corpus(self, corp_in):
        """
        Insert topical representation into corpus.

        :param corp_in: Corpus into whic we want to insert topical representations
        :return: `Orange.data.table.Table`
        """
        matrix = matutils.corpus2dense(self.corpus,
                                       num_terms=self.num_topics).T

        # Generate the new table.
        attr = [ContinuousVariable(n) for n in self.topic_names]
        domain = Domain(attr,
                        corp_in.domain.class_vars,
                        metas=corp_in.domain.metas)

        return Table.from_numpy(domain,
                                matrix,
                                Y=corp_in._Y,
                                metas=corp_in.metas)
Esempio n. 15
0
    def get_all_topics_table(self):
        """ Transform all topics from gensim model to table. """
        all_words = self._topics_words(self.n_words)
        all_weights = self._topics_weights(self.n_words)
        sorted_words = sorted(all_words[0])
        n_topics = len(all_words)

        X = []
        for words, weights in zip(all_words, all_weights):
            weights = [we for wo, we in sorted(zip(words, weights))]
            X.append(weights)
        X = np.array(X).T

        # take only first n_topics; e.g. when user requested 10, but gensim
        # returns only 9 — when the rank is lower than num_topics requested
        attrs = [ContinuousVariable(n)
                 for n in self.topic_names[:n_topics]]

        t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]),
                             X=X, metas=np.array(sorted_words)[:, None])
        t.name = 'All topics'
        return t
Esempio n. 16
0
    def get_all_topics_table(self):
        """ Transform all topics from gensim model to table. """
        all_words = self._topics_words(self.n_words)
        all_weights = self._topics_weights(self.n_words)
        sorted_words = sorted(all_words[0])
        n_topics = len(all_words)

        X = []
        for words, weights in zip(all_words, all_weights):
            weights = [we for wo, we in sorted(zip(words, weights))]
            X.append(weights)
        X = np.array(X).T

        # take only first n_topics; e.g. when user requested 10, but gensim
        # returns only 9 — when the rank is lower than num_topics requested
        attrs = [ContinuousVariable(n) for n in self.topic_names[:n_topics]]

        t = Table.from_numpy(Domain(attrs, metas=[StringVariable('Word')]),
                             X=X,
                             metas=np.array(sorted_words)[:, None])
        t.name = 'All topics'
        return t