Example #1
0
    def __set_index(self, f):
        # type: (Future) -> None
        # set results from `list_remote` query.
        assert QThread.currentThread() is self.thread()
        assert f.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        allinfolocal = self.list_local()
        try:
            res = f.result()
        except Exception:
            log.exception("Error while fetching updated index")
            if not allinfolocal:
                self.Error.no_remote_datasets()
            else:
                self.Warning.only_local_datasets()
            res = {}

        allinforemote = res  # type: Dict[Tuple[str, str], dict]
        allkeys = set(allinfolocal)
        if allinforemote is not None:
            allkeys = allkeys | set(allinforemote)
        allkeys = sorted(allkeys)

        def info(file_path):
            if file_path in allinforemote:
                info = allinforemote[file_path]
            else:
                info = allinfolocal[file_path]
            islocal = file_path in allinfolocal
            isremote = file_path in allinforemote
            outdated = islocal and isremote and (
                allinforemote[file_path].get('version', '') !=
                allinfolocal[file_path].get('version', ''))
            islocal &= not outdated
            prefix = os.path.join('', *file_path[:-1])
            filename = file_path[-1]

            return namespace(
                prefix=prefix, filename=filename,
                title=info.get("title", filename),
                datetime=info.get("datetime", None),
                description=info.get("description", None),
                references=info.get("references", []),
                seealso=info.get("seealso", []),
                source=info.get("source", None),
                year=info.get("year", None),
                instances=info.get("instances", None),
                variables=info.get("variables", None),
                target=info.get("target", None),
                missing=info.get("missing", None),
                tags=info.get("tags", []),
                size=info.get("size", None),
                islocal=islocal,
                outdated=outdated
            )

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(HEADER)

        current_index = -1
        for i, file_path in enumerate(allkeys):
            datainfo = info(file_path)
            item1 = QStandardItem()
            item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole)
            item1.setData(datainfo, Qt.UserRole)
            item2 = QStandardItem(datainfo.title)
            item3 = QStandardItem()
            item3.setData(datainfo.size, Qt.DisplayRole)
            item4 = QStandardItem()
            item4.setData(datainfo.instances, Qt.DisplayRole)
            item5 = QStandardItem()
            item5.setData(datainfo.variables, Qt.DisplayRole)
            item6 = QStandardItem()
            item6.setData(datainfo.target, Qt.DisplayRole)
            if datainfo.target:
                item6.setIcon(variable_icon(datainfo.target))
            item7 = QStandardItem()
            item7.setData(", ".join(datainfo.tags) if datainfo.tags else "",
                          Qt.DisplayRole)
            row = [item1, item2, item3, item4, item5, item6, item7]
            model.appendRow(row)

            if os.path.join(*file_path) == self.selected_id:
                current_index = i

        hs = self.view.header().saveState()
        model_ = self.view.model().sourceModel()
        self.view.model().setSourceModel(model)
        self.view.header().restoreState(hs)
        model_.deleteLater()
        model_.setParent(None)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection
        )
        # Update the info text
        self.infolabel.setText(format_info(model.rowCount(), len(allinfolocal)))

        if current_index != -1:
            selmodel = self.view.selectionModel()
            selmodel.select(
                self.view.model().mapFromSource(model.index(current_index, 0)),
                QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows)
Example #2
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    class Inputs:
        evaluation_results = Input("Evaluation Results",
                                   Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settings_version = 1
    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting([0], schema_only=True)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg(
            "Evaluation Results input contains invalid values")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(self.controlArea,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        box=True,
                                        callback=self._learner_changed)

        self.outputbox = gui.vBox(self.controlArea, "Output")
        box = gui.hBox(self.outputbox)
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.outputbox,
                        self,
                        "autocommit",
                        "Send Selected",
                        "Send Automatically",
                        box=False)

        self.mainArea.layout().setContentsMargins(0, 0, 0, 0)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     label="Show: ",
                     orientation=Qt.Horizontal,
                     callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        else:
            self.Error.no_regression.clear()

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        if nan_values:
            self.Error.invalid_values()
        else:
            self.Error.invalid_values.clear()

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]
            self.headers = class_values + \
                           [unicodedata.lookup("N-ARY SUMMATION")]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = [
                    "Learner #{}".format(i + 1) for i in range(nmodels)
                ]

            self._init_table(len(class_values))
            self.openContext(data.domain.class_var)
            if not prev_sel_learner or prev_sel_learner[0] >= len(
                    self.learners):
                if self.learners:
                    self.selected_learner[:] = [0]
            else:
                self.selected_learner[:] = prev_sel_learner
            self._update()
            self._set_selection()
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            var = Orange.data.DiscreteVariable(
                "{}({})".format(class_var.name, learner_name),
                class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [
                Orange.data.ContinuousVariable("p({})".format(value))
                for value in class_var.values
            ]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if len(extra):
            data.metas[:, len(self.data.domain.metas):] = \
                np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(np.int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner[0]],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Example #3
0
class PivotTableView(QTableView):
    selection_changed = pyqtSignal()

    TOTAL_STRING = "Total"

    def __init__(self):
        super().__init__(editTriggers=QTableView.NoEditTriggers)
        self._n_classesv = None  # number of row_feature values
        self._n_classesh = None  # number of col_feature values
        self._n_agg_func = None  # number of aggregation functions
        self._n_leading_rows = None  # number of leading rows
        self._n_leading_cols = None  # number of leading columns

        self.table_model = QStandardItemModel(self)
        self.setModel(self.table_model)
        self.horizontalHeader().hide()
        self.verticalHeader().hide()
        self.horizontalHeader().setMinimumSectionSize(60)
        self.setShowGrid(False)
        self.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        self.setItemDelegate(BorderedItemDelegate())
        self.pressed.connect(self.__cell_clicked)
        self.clicked.connect(self.__cell_clicked)
        self.entered.connect(self.__cell_entered)
        self.__clicked_cell = None

    @property
    def add_agg_column(self) -> bool:
        return self._n_agg_func > 1

    def __cell_entered(self, model_index):
        if self.__clicked_cell is None:
            return
        index = self.table_model.index
        selection = None
        i_end, j_end = model_index.row(), model_index.column()
        i_start, j_start = self.__clicked_cell
        i_start, i_end = sorted([i_start, i_end])
        j_start, j_end = sorted([j_start, j_end])
        if i_start >= self._n_leading_rows and j_start >= self._n_leading_cols:
            i_start = (i_start - self._n_leading_rows) // self._n_agg_func * \
                self._n_agg_func + self._n_leading_rows
            i_end = (i_end - self._n_leading_rows) // self._n_agg_func * \
                self._n_agg_func + self._n_leading_rows + self._n_agg_func - 1
            start, end = index(i_start, j_start), index(i_end, j_end)
            selection = QItemSelection(start, end)
        if selection is not None:
            self.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)
        self.selection_changed.emit()

    def __cell_clicked(self, model_index):
        i, j = model_index.row(), model_index.column()
        self.__clicked_cell = (i, j)
        m, n = self.table_model.rowCount(), self.table_model.columnCount()
        index = self.table_model.index
        selection = None
        if i > m - self._n_agg_func - 1 and j == n - 1:
            start_index = index(self._n_leading_rows, self._n_leading_cols)
            selection = QItemSelection(start_index, index(m - 1, n - 1))
        elif i == self._n_leading_rows - 1 or i > m - self._n_agg_func - 1:
            start_index = index(self._n_leading_rows, j)
            selection = QItemSelection(start_index, index(m - 1, j))
        elif j in (self._n_leading_cols - 1, n - 1, 1):
            i_start = (i - self._n_leading_rows) // self._n_agg_func * \
                      self._n_agg_func + self._n_leading_rows
            i_end = i_start + self._n_agg_func - 1
            start_index = index(i_start, self._n_leading_cols)
            selection = QItemSelection(start_index, index(i_end, n - 1))
        elif i >= self._n_leading_rows and j >= self._n_leading_cols:
            i_start = (i - self._n_leading_rows) // self._n_agg_func * \
                      self._n_agg_func + self._n_leading_rows
            i_end = i_start + self._n_agg_func - 1
            selection = QItemSelection(index(i_start, j), index(i_end, j))

        if selection is not None:
            self.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)

    def mouseReleaseEvent(self, e):
        super().mouseReleaseEvent(e)
        self.selection_changed.emit()

    def update_table(self, titleh: str, titlev: str, table: Table,
                     table_total_h: Table, table_total_v: Table,
                     table_total: Table):
        self.clear()
        if not table:
            return

        self._initialize(table, table_total_h)
        self._set_headers(titleh, titlev, table)
        self._set_values(table[:, 2:])
        self._set_totals(table_total_h[:, 2:], table_total_v, table_total)
        self._draw_lines()
        self._resize(table)

    def _initialize(self, table, table_total_h):
        self._n_classesv = int(len(table) / len(table_total_h))
        self._n_classesh = table.X.shape[1] - 2
        self._n_agg_func = len(table_total_h)
        self._n_leading_rows = 2
        self._n_leading_cols = 2 + int(len(table_total_h) > 1)

    def _set_headers(self, titleh, titlev, table):
        self.__set_horizontal_title(titleh)
        self.__set_vertical_title(titlev)
        self.__set_flags_title()
        self.__set_horizontal_headers(table)
        self.__set_vertical_headers(table)

    def __set_horizontal_title(self, titleh):
        item = QStandardItem()
        item.setData(titleh, Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        self.table_model.setItem(0, self._n_leading_cols, item)
        self.setSpan(0, self._n_leading_cols, 1, self._n_classesh + 3)

    def __set_vertical_title(self, titlev):
        item = QStandardItem()
        item.setData(titlev, Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        self.setItemDelegateForColumn(0, gui.VerticalItemDelegate(extend=True))
        self.table_model.setItem(self._n_leading_rows, 0, item)
        row_span = self._n_classesv * self._n_agg_func + 1
        self.setSpan(self._n_leading_rows, 0, row_span, 1)

    def __set_flags_title(self):
        item = self.table_model.item(0, self._n_leading_cols)
        item.setFlags(Qt.NoItemFlags)
        item = self.table_model.item(self._n_leading_rows, 0)
        item.setFlags(Qt.NoItemFlags)
        for i, j in product(range(self._n_leading_rows),
                            range(self._n_leading_cols)):
            item = QStandardItem()
            item.setFlags(Qt.NoItemFlags)
            self.table_model.setItem(i, j, item)

    def __set_horizontal_headers(self, table):
        labels = [a.name for a in table.domain[1:]] + [self.TOTAL_STRING]
        if not self.add_agg_column:
            labels[0] = str(table[0, 1])
        for i, label in enumerate(labels, self._n_leading_cols - 1):
            self.table_model.setItem(1, i, self._create_header_item(label))

    def __set_vertical_headers(self, table):
        labels = [(str(row[0]), str(row[1])) for row in table]
        i = self._n_leading_rows - 1
        for i, (l1, l2) in enumerate(labels, self._n_leading_rows):
            l1 = "" if (i - self._n_leading_rows) % self._n_agg_func else l1
            self.table_model.setItem(i, 1, self._create_header_item(l1))
            if self.add_agg_column:
                self.table_model.setItem(i, 2, self._create_header_item(l2))

        if self.add_agg_column:
            labels = [str(row[1]) for row in table[:self._n_agg_func]]
            start = self._n_leading_rows + self._n_agg_func * self._n_classesv
            for j, l2 in enumerate(labels, i + 1):
                l1 = self.TOTAL_STRING if j == start else ""
                self.table_model.setItem(j, 1, self._create_header_item(l1))
                self.table_model.setItem(j, 2, self._create_header_item(l2))
        else:
            item = self._create_header_item(self.TOTAL_STRING)
            self.table_model.setItem(i + 1, 1, item)

    def _set_values(self, table):
        for i, j in product(range(len(table)), range(len(table[0]))):
            value = table[i, j]
            item = self._create_value_item(str(value))
            self.table_model.setItem(i + self._n_leading_rows,
                                     j + self._n_leading_cols, item)

    def _set_totals(self, table_total_h, table_total_v, table_total):
        def set_total_item(table, get_row, get_col):
            for i, j in product(range(len(table)), range(len(table[0]))):
                item = self._create_header_item(str(table[i, j]))
                self.table_model.setItem(get_row(i), get_col(j), item)

        last_row = self._n_leading_rows + self._n_classesv * self._n_agg_func
        last_col = self._n_leading_cols + self._n_classesh
        set_total_item(table_total_v, lambda x: x + self._n_leading_rows,
                       lambda x: last_col)
        set_total_item(table_total_h, lambda x: x + last_row,
                       lambda x: x + self._n_leading_cols)
        set_total_item(table_total, lambda x: x + last_row, lambda x: last_col)

    def _create_header_item(self, text):
        bold_font = self.table_model.invisibleRootItem().font()
        bold_font.setBold(True)
        item = QStandardItem()
        item.setData(text, Qt.DisplayRole)
        item.setFont(bold_font)
        item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
        item.setFlags(Qt.ItemIsEnabled)
        return item

    @staticmethod
    def _create_value_item(text):
        item = QStandardItem()
        item.setData(text, Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
        item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
        return item

    def _draw_lines(self):
        end_col = self._n_leading_cols + self._n_classesh + 1
        total_row = self._n_leading_rows + self._n_classesv * self._n_agg_func
        indices = [(total_row, j) for j in range(1, end_col)]
        for i in range(self._n_classesv):
            inner_row = self._n_agg_func * i + self._n_leading_rows
            inner_indices = [(inner_row, j) for j in range(1, end_col)]
            indices = indices + inner_indices
            if not self.add_agg_column:
                break
        for i, j in indices:
            item = self.table_model.item(i, j)
            item.setData("t", BorderRole)
            item.setData(QColor(160, 160, 160), BorderColorRole)

    def _resize(self, table):
        labels = [a.name for a in table.domain[1:]] + [self.TOTAL_STRING]
        if len(' '.join(labels)) < 120:
            self.horizontalHeader().setSectionResizeMode(
                QHeaderView.ResizeToContents)
        else:
            self.horizontalHeader().setDefaultSectionSize(60)

    def get_selection(self) -> Set:
        m, n = self._n_leading_rows, self._n_leading_cols
        return {(ind.row() - m, ind.column() - n)
                for ind in self.selectedIndexes()}

    def set_selection(self, indexes: Set):
        selection = QItemSelection()
        index = self.model().index
        for row, col in indexes:
            sel = index(row + self._n_leading_rows, col + self._n_leading_cols)
            selection.select(sel, sel)
        self.selectionModel().select(selection,
                                     QItemSelectionModel.ClearAndSelect)

    def clear(self):
        self.table_model.clear()
class ContingencyTable(QTableView):
    """
    A contingency table widget which can be used wherever ``QTableView`` could be used.

    Parameters
    ----------
    parent : Orange.widgets.widget.OWWidget
        The containing widget to which the table is connected.

    Attributes
    ----------
    classesv : :obj:`list` of :obj:`str`
        Vertical class headers.
    classesh : :obj:`list` of :obj:`str`
        Horizontal class headers.
    headerv : :obj:`str`, optional
        Vertical top header.
    headerh : :obj:`str`, optional
        Horizontal top header.
    corner_string : str
        String that is top right and bottom left corner of the table.
        Default is ``unicodedata.lookup("N-ARY SUMMATION")``.
    """
    def __init__(self, parent):
        super().__init__(editTriggers=QTableView.NoEditTriggers)

        self.bold_headers = None
        self.circles = False
        self.classesv = None
        self.classesh = None
        self.headerv = None
        self.headerh = None
        self.parent = parent

        self.corner_string = unicodedata.lookup("N-ARY SUMMATION")

        self.tablemodel = QStandardItemModel(self)
        self.setModel(self.tablemodel)
        self.horizontalHeader().hide()
        self.verticalHeader().hide()
        self.horizontalHeader().setMinimumSectionSize(60)
        self.setShowGrid(False)
        self.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        self.clicked.connect(self._cell_clicked)

    def mouseReleaseEvent(self, e):
        super().mouseReleaseEvent(e)
        self.parent._invalidate()

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.parent._invalidate()

    def _cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        m = self.tablemodel.columnCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or not self.circles and i == n - 1 and j == m - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, m - 1))
        elif i == 1 or not self.circles and i == n - 1:
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j == 1 or not self.circles and j == m - 1:
            selection = QItemSelection(index(i, 2), index(i, m - 1))

        if selection is not None:
            self.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def set_variables(self, variablev, variableh, **kwargs):
        """
        Sets class headers and top headers and initializes table structure.

        Parameters
        ----------
        variablev : Orange.data.variable.DiscreteVariable
            Class headers are set to ``variablev.values``, top header is set to ``variablev.name``.
        variableh : Orange.data.variable.DiscreteVariable
            Class headers are set to ``variableh.values``, top header is set to ``variableh.name``.
        """
        self.classesv = variablev.values
        self.classesh = variableh.values
        self.headerv = variablev.name
        self.headerh = variableh.name
        self.initialize(**kwargs)

    def set_headers(self,
                    classesv,
                    classesh,
                    headerv=None,
                    headerh=None,
                    **kwargs):
        """
        Sets class headers and top headers and initializes table structure.

        Parameters
        ----------
        classesv : :obj:`list` of :obj:`str`
            Vertical class headers.
        classesh : :obj:`list` of :obj:`str`
            Horizontal class headers.
        headerv : :obj:`str`, optional
            Vertical top header.
        headerh : :obj:`str`, optional
            Horizontal top header.
        """
        self.classesv = classesv
        self.classesh = classesh
        self.headerv = headerv
        self.headerh = headerh
        self.initialize(**kwargs)

    def _style_cells(self):
        """
        Style all cells.
        """
        if self.circles:
            self.setItemDelegate(CircleItemDelegate(Qt.white))
        else:
            self.setItemDelegate(BorderedItemDelegate(Qt.white))
        item = self._item(0, 2)
        item.setData(self.headerh, Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData(self.headerv, Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.setSpan(0, 2, 1, len(self.classesh) + 1)
        self.setSpan(2, 0, len(self.classesv) + 1, 1)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

    def _initialize_headers(self):
        """
        Fill headers with content and style them.
        """
        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for headers, ix in ((self.classesv + [self.corner_string], lambda p:
                             (p + 2, 1)),
                            (self.classesh + [self.corner_string], lambda p:
                             (1, p + 2))):
            for p, label in enumerate(headers):
                i, j = ix(p)
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                if self.bold_headers:
                    item.setFont(bold_font)
                if not (i == 1 and self.circles):
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                else:
                    item.setData("", BorderRole)
                self._set_item(i, j, item)

    def _resize(self):
        """
        Resize table to fit new contents and style.
        """
        if self.circles:
            self.resizeRowToContents(1)
            self.horizontalHeader().setDefaultSectionSize(self.rowHeight(2))
            self.resizeColumnToContents(1)
            self.tablemodel.setRowCount(len(self.classesv) + 2)
            self.tablemodel.setColumnCount(len(self.classesh) + 2)
        else:
            if len(' '.join(self.classesh + [self.corner_string])) < 120:
                self.horizontalHeader().setSectionResizeMode(
                    QHeaderView.ResizeToContents)
            else:
                self.horizontalHeader().setDefaultSectionSize(60)
            self.tablemodel.setRowCount(len(self.classesv) + 3)
            self.tablemodel.setColumnCount(len(self.classesh) + 3)

    def initialize(self, circles=False, bold_headers=True):
        """
        Initializes table structure. Class headers must be set beforehand.

        Parameters
        ----------
        circles : :obj:`bool`, optional
            Turns on circle display. All table values should be between 0 and 1 (inclusive). Defaults to False.
        bold_headers : :obj:`bool`, optional
            Whether the headers are bold or not. Defaults to True.
        """
        assert self.classesv is not None and self.classesh is not None

        self.circles = circles
        self.bold_headers = bold_headers

        self._style_cells()
        self._initialize_headers()
        self._resize()

    def get_selection(self):
        """
        Get indexes of selected cells.

        Returns
        -------
        :obj:`set` of :obj:`tuple` of :obj:`int`
            Set of pairs of indexes.
        """
        return {(ind.row() - 2, ind.column() - 2)
                for ind in self.selectedIndexes()}

    def set_selection(self, indexes):
        """
        Set indexes of selected cells.

        Parameters
        ----------
        indexes : :obj:`set` of :obj:`tuple` of :obj:`int`
            Set of pairs of indexes.
        """
        selection = QItemSelection()
        index = self.model().index
        for row, col in indexes:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.selectionModel().select(selection,
                                     QItemSelectionModel.ClearAndSelect)

    def _set_sums(self, colsum, rowsum):
        """
        Set content of cells on bottom and right edge.

        Parameters
        ----------
        colsum : numpy.array
            Content of cells on bottom edge.
        rowsum : numpy.array
            Content of cells on right edge.
        """
        bold_font = self.tablemodel.invisibleRootItem().font()
        bold_font.setBold(True)

        def _sum_item(value, border=""):
            item = QStandardItem()
            item.setData(value, Qt.DisplayRole)
            item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
            item.setFlags(Qt.ItemIsEnabled)
            item.setFont(bold_font)
            item.setData(border, BorderRole)
            item.setData(QColor(192, 192, 192), BorderColorRole)
            return item

        for i in range(len(self.classesh)):
            self._set_item(
                len(self.classesv) + 2, i + 2, _sum_item(int(colsum[i]), "t"))
        for i in range(len(self.classesv)):
            self._set_item(i + 2,
                           len(self.classesh) + 2,
                           _sum_item(int(rowsum[i]), "l"))
        self._set_item(
            len(self.classesv) + 2,
            len(self.classesh) + 2, _sum_item(int(rowsum.sum())))

    def _set_values(self, matrix, colors, formatstr, tooltip):
        """
        Set content of cells which aren't headers and don't represent aggregate values.

        Parameters
        ----------
        matrix : numpy.array
            2D array to be set as data.
        colors : :obj:`numpy.array`
            2D array with color values.
        formatstr : :obj:`str`, optional
            Format string for cell data.
        tooltip : :obj:`(int, int) -> str`
            Function which takes vertical index and horizontal index as arguments and returns
            desired tooltip as a string.
        """
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        for i in range(len(self.classesv)):
            for j in range(len(self.classesh)):
                val = matrix[i, j]
                col_val = float('nan') if colors is None else colors[i, j]
                item = QStandardItem()
                if self.circles:
                    item.setData(val, CircleAreaRole)
                else:
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                item.setData("trbl", BorderRole)
                if tooltip is not None:
                    item.setToolTip(tooltip(i, j))
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                self._set_item(i + 2, j + 2, item)

    def update_table(self,
                     matrix,
                     colsum=None,
                     rowsum=None,
                     colors=None,
                     formatstr="{}",
                     tooltip=None):
        """
        Sets ``matrix`` as data of the table.

        Parameters
        ----------
        matrix : numpy.array
            2D array to be set as data.
        colsum : :obj:`numpy.array`, optional
            1D optional array with aggregate values of columns, defaults to sum.
        rowsum : :obj:`numpy.array`, optional
            1D optional array with aggregate values of rows, defaults to sum.
        colors : :obj:`numpy.array`, optional
            2D array with color values, defaults to no color.
        formatstr : :obj:`str`, optional
            Format string for cell data, defaults to ``"{}"``.
        tooltip : :obj:`(int, int) -> str`, optional
            Function which takes vertical index and horizontal index as arguments and returns
            desired tooltip as a string. Defaults to no tooltips.
        """
        selected_indexes = self.get_selection()

        self._set_values(matrix, colors, formatstr, tooltip)
        if not self.circles:
            if colsum is None:
                colsum = matrix.sum(axis=0)
            if rowsum is None:
                rowsum = matrix.sum(axis=1)
            self._set_sums(colsum, rowsum)

        self.set_selection(selected_indexes)

    def clear(self):
        """
        Clears the table.
        """
        self.tablemodel.clear()
Example #5
0
    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results), default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels([
            "Category", "Term", "Count", "Reference count", "p-value", "FDR",
            "Enrichment"
        ])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped,
                     user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped,
                     user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(
                ),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit)

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(
            gsname(geneset) for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames), set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max(
                (e.enrichment_score
                 for _, e in results if np.isfinite(e.enrichment_score)),
                default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score)))

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView))

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))


#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")
Example #6
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)
        corpus = Output("Corpus", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL
    )

    search_indices = ContextSetting([], exclude_metas=False)   # features included in search
    display_indices = ContextSetting([], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    selected_documents = ContextSetting([])
    regexp_filter = ContextSetting("")

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]
        self.matches = 0                # Matches of the query

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching documents: %(n_matching)s')
        gui.label(info_box, self, 'Matches: %(n_matches)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_list_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:',
                                         callback=self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )
        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(
            self.selection_changed
        )
        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selected_documents = [corpus.titles[0]] if \
                corpus.titles is not None and len(corpus.titles) else []
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        # TODO: remove search_keyword??
        search_keyword = self.regexp_filter.strip('|')
        matches = 0
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            res = len(list(reg.finditer(content))) if self.regexp_filter else 0
            if not self.regexp_filter or res:
                matches += res
                item = QStandardItem()
                item.setData(str(title), Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
        self.matches = matches

    def get_selected_documents_from_view(self) -> Set[str]:
        """
        Returns
        -------
        Set with names of selected documents in the QTableView
        """
        return {
            i.data(Qt.DisplayRole)
            for i in self.doc_list.selectionModel().selectedRows()
        }

    def set_selection(self) -> None:
        """
        Select documents in selected_documents attribute in the view
        """
        view = self.doc_list
        model = view.model()

        previously_selected = self.selected_documents.copy()
        selection = QItemSelection()
        for row in range(model.rowCount()):
            document = model.data(model.index(row, 0), Qt.DisplayRole)
            if document in self.selected_documents:
                selection.append(QItemSelectionRange(
                    view.model().index(row, 0),
                    view.model().index(row, 0)
                ))
        view.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect
        )
        if len(selection) == 0:
            # in cases when selection is empty qt's selection_changed is not
            # called and so we need to manually trigger show_docs
            self.show_docs()
        # select emmit selection change signal which causes calling
        # selection_changed when filtering it means that documents which
        # are currently filtered out get removed from self.selected_douments
        # we still want to keep them to be still selected after user removes
        # filter
        self.selected_documents = previously_selected

    def selection_changed(self) -> None:
        """
        Function is called every time the selection changes - when user select
        new range of documents
        """
        self.selected_documents = self.get_selected_documents_from_view()
        self.show_docs()
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}
        
        .content {{
            /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */
        
            /* These are technically the same, but use both */
            overflow-wrap: break-word;
            word-wrap: break-word;
        
            -ms-word-break: break-all;
            /* This is the dangerous one in WebKit, as it breaks things wherever */
            word-break: break-all;
            /* Instead use this non-standard one: */
            word-break: break-word;
        
            /* Adds a hyphen where the word breaks, if supported (No Blink) */
            -ms-hyphens: auto;
            -moz-hyphens: auto;
            -webkit-hyphens: auto;
            hyphens: auto;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                value = str(index.data(Qt.UserRole)[feature.name])
                if feature in marked_search_features:
                    value = self.__mark_text(value)
                value = value.replace('\n', '<br/>')
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td class="content">{}</td></tr>'.format(
                    feature.name, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def __mark_text(self, text):
        search_keyword = self.regexp_filter.strip('|')
        if not search_keyword:
            return text

        try:
            reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
        except sre_constants.error:
            return text

        matches = list(reg.finditer(text))
        if not matches:
            return text

        text = list(text)
        for m in matches[::-1]:
            text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
                .format("".join(text[m.start():m.end()])))

        return "".join(text)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.set_selection()
            self.update_info()
            self.commit()

    def update_info(self):
        if self.corpus is not None:
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), len(self.corpus))
            self.n_matches = self.matches if self.matches else 'n/a'
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
        else:
            self.n_matching = ''
            self.n_matches = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        matched = unmatched = annotated_corpus = None
        corpus = self.corpus
        if corpus is not None:
            # it returns a set of selected documents which are in view
            selected_docs = self.get_selected_documents_from_view()
            titles = corpus.titles
            matched_mask = [
                i for i, t in enumerate(titles) if t in selected_docs
            ]
            unmatched_mask = [
                i for i, t in enumerate(titles) if t not in selected_docs
            ]

            matched = corpus[matched_mask] if len(matched_mask) else None
            unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None
            annotated_corpus = create_annotated_table(corpus, matched_mask)
        self.Outputs.matching_docs.send(matched)
        self.Outputs.other_docs.send(unmatched)
        self.Outputs.corpus.send(annotated_corpus)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
            ("Matches", self.n_matches)
        ))

    def showEvent(self, event):
        super().showEvent(event)
        self.update_splitter()

    def update_splitter(self):
        """
        Update splitter that document list on the left never take more
        than 1/3 of the space. It is only set on showEvent. If user
        later changes sizes it stays as it is.
        """
        w1, w2 = self.splitter.sizes()
        ws = w1 + w2
        if w2 < 2/3 * ws:
            self.splitter.setSizes([ws * 1/3, ws * 2/3])
Example #7
0
class EditableTreeView(QWidget):
    dataChanged = Signal()
    selectionChanged = Signal()

    def __init__(self, parent=None):
        super().__init__(parent=parent)
        self.__stack: List = []
        self.__stack_index: int = -1

        def push_on_data_changed(_, __, roles):
            if Qt.EditRole in roles:
                self._push_data()

        self.__model = QStandardItemModel()
        self.__model.dataChanged.connect(self.dataChanged)
        self.__model.dataChanged.connect(push_on_data_changed)
        self.__root: QStandardItem = self.__model.invisibleRootItem()

        self.__tree = TreeView(self.dataChanged)
        self.__tree.drop_finished.connect(self.dataChanged)
        self.__tree.drop_finished.connect(self._push_data)
        self.__tree.setModel(self.__model)
        self.__tree.selectionModel().selectionChanged.connect(
            self.selectionChanged)

        actions_widget = ModelActionsWidget()
        actions_widget.layout().setSpacing(1)

        action = QAction("+", self, toolTip="Add a new word")
        action.triggered.connect(self.__on_add)
        actions_widget.addAction(action)

        action = QAction("\N{MINUS SIGN}", self, toolTip="Remove word")
        action.triggered.connect(self.__on_remove)
        actions_widget.addAction(action)

        action = QAction("\N{MINUS SIGN}R", self,
                         toolTip="Remove word recursively (incl. children)")
        action.triggered.connect(self.__on_remove_recursive)
        actions_widget.addAction(action)

        gui.rubber(actions_widget)

        self.__undo_action = action = QAction("Undo", self, toolTip="Undo")
        action.triggered.connect(self.__on_undo)
        actions_widget.addAction(action)

        self.__redo_action = action = QAction("Redo", self, toolTip="Redo")
        action.triggered.connect(self.__on_redo)
        actions_widget.addAction(action)

        self._enable_undo_redo()

        layout = QVBoxLayout()
        layout.setSpacing(1)
        layout.setContentsMargins(0, 0, 0, 0)
        layout.addWidget(self.__tree)
        layout.addWidget(actions_widget)
        self.setLayout(layout)

    def __on_add(self):
        parent: QStandardItem = self.__root
        selection: List = self.__tree.selectionModel().selectedIndexes()
        if selection:
            sel_index: QModelIndex = selection[0]
            parent: QStandardItem = self.__model.itemFromIndex(sel_index)

        item = QStandardItem("")
        parent.appendRow(item)
        index: QModelIndex = item.index()
        with disconnected(self.__model.dataChanged, self.dataChanged):
            self.__model.setItemData(index, {Qt.EditRole: ""})
        self.__tree.setCurrentIndex(index)
        self.__tree.edit(index)

    def __on_remove_recursive(self):
        sel_model: QItemSelectionModel = self.__tree.selectionModel()
        if len(sel_model.selectedIndexes()):
            while sel_model.selectedIndexes():
                index: QModelIndex = sel_model.selectedIndexes()[0]
                self.__model.removeRow(index.row(), index.parent())
            self._push_data()
            self.dataChanged.emit()

    def __on_remove(self):
        sel_model: QItemSelectionModel = self.__tree.selectionModel()
        if len(sel_model.selectedIndexes()):

            while sel_model.selectedIndexes():
                index: QModelIndex = sel_model.selectedIndexes()[0]

                # move children to item's parent
                item: QStandardItem = self.__model.itemFromIndex(index)
                children = [item.takeChild(i) for i in range(item.rowCount())]
                parent = item.parent() or self.__root

                self.__model.removeRow(index.row(), index.parent())

                for child in children[::-1]:
                    parent.insertRow(index.row(), child)

            self.__tree.expandAll()
            self._push_data()
            self.dataChanged.emit()

    def __on_undo(self):
        self.__stack_index -= 1
        self._set_from_stack()

    def __on_redo(self):
        self.__stack_index += 1
        self._set_from_stack()

    def get_words(self) -> List:
        return _model_to_words(self.__root)

    def get_selected_words(self) -> Set:
        return set(self.__model.itemFromIndex(index).text() for index in
                   self.__tree.selectionModel().selectedIndexes())

    def get_selected_words_with_children(self) -> Set:
        words = set()
        for index in self.__tree.selectionModel().selectedIndexes():
            item: QStandardItem = self.__model.itemFromIndex(index)
            words.update(_model_to_words(item))
        return words

    def get_data(self, with_selection=False) -> Union[Dict, OntoType]:
        selection = self.__tree.selectionModel().selectedIndexes()
        return _model_to_tree(self.__root, selection, with_selection)

    def set_data(self, data: Dict, keep_history: bool = False):
        if not keep_history:
            self.__stack = []
            self.__stack_index = -1
        self._set_data(data)
        self._push_data()

    def _set_data(self, data: Dict):
        self.clear()
        _tree_to_model(data, self.__root, self.__tree.selectionModel())
        self.__tree.expandAll()

    def clear(self):
        if self.__model.hasChildren():
            self.__model.removeRows(0, self.__model.rowCount())

    def _enable_undo_redo(self):
        index = self.__stack_index
        self.__undo_action.setEnabled(index >= 1)
        self.__redo_action.setEnabled(index < len(self.__stack) - 1)

    def _push_data(self):
        self.__stack_index += 1
        self.__stack = self.__stack[:self.__stack_index]
        self.__stack.append(self.get_data())
        self._enable_undo_redo()

    def _set_from_stack(self):
        assert self.__stack_index < len(self.__stack)
        assert self.__stack_index >= 0
        self._set_data(self.__stack[self.__stack_index])
        self._enable_undo_redo()
        self.dataChanged.emit()
Example #8
0
class ResolweDataWidget(QWidget):
    def __init__(self, data_objects, descriptor_schema, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.ow = kwargs.get('parent', None)

        self._data_objects = data_objects
        self.descriptor_schema = descriptor_schema
        self.header_schema = None
        self.header = None

        # set layout
        layout = QVBoxLayout()
        layout.setContentsMargins(0, 0, 0, 0)
        self.setLayout(layout)

        self.view = QTreeView()
        self.view.setSortingEnabled(False)
        self.view.setAlternatingRowColors(True)
        self.view.setEditTriggers(QTreeView.NoEditTriggers)
        self.view.setSelectionMode(QTreeView.SingleSelection)

        self.model = QStandardItemModel()
        self.display_data_objects()

        self.layout().addWidget(self.view)

    def __set_header_values(self):
        if self.header_schema:
            labels = [val.get('label', '?') for val in self.header_schema]
            self.model.setHorizontalHeaderLabels(labels)

    def __create_row(self, obj):
        row_items = []
        tabular_data = obj.descriptor.get('tabular', None)
        output_data = obj.output.get('table', None)

        # TODO: refactor this. Use file_name and size from obj.output instead of desc. schema
        for schema_value in self.header_schema:
            item = QStandardItem()
            schema_key = schema_value['name']
            data_info = tabular_data.get(schema_key,
                                         '?') if tabular_data else '?'

            if schema_key == 'file_name' and data_info == '?':
                data_info = output_data.get('file',
                                            '?') if output_data else '?'
            elif schema_key == 'file_size' and data_info == '?':
                data_info = output_data.get('size',
                                            '?') if output_data else '?'

            item.setData(data_info, Qt.DisplayRole)
            row_items.append(item)

        return row_items

    def __populate_data_model(self):
        if self.model:
            self.model.clear()
            for data_object in self.data_objects:
                self.model.appendRow(self.__create_row(data_object))

    def __parse_description_schema(self):
        self.header_schema = []

        if self.descriptor_schema:
            for schema_value in self.descriptor_schema.schema:
                if schema_value['name'] == 'tabular':
                    [
                        self.header_schema.append(value)
                        for value in schema_value['group']
                    ]

        if self.header_schema:
            keys = [val.get('name', '?') for val in self.header_schema]
            header_index = namedtuple('header_index',
                                      [label for label in keys])
            self.header = header_index(
                *[index for index, _ in enumerate(keys)])

    @property
    def data_objects(self):
        return self._data_objects

    @data_objects.setter
    def data_objects(self, data_objects):
        self._data_objects = data_objects
        self.display_data_objects()

    def display_data_objects(self):
        self.__parse_description_schema()
        self.__populate_data_model()
        self.__set_header_values()
        self.view.setModel(self.model)

    def set_target_column(self, target_column):
        # type: (int) -> None

        for row in range(self.model.rowCount()):
            item = self.model.item(row, target_column)
            item_data = item.data(role=Qt.DisplayRole)
            if item_data:
                item.setIcon(variable_icon(item_data))

    def selected_data_object(self):
        # type: () -> Data
        rows = self.view.selectionModel().selectedRows()
        assert 0 <= len(rows) <= 1
        sel_row_index = rows[0].row() if rows else None

        obj_range = range(len(self._data_objects))
        assert sel_row_index in obj_range

        try:
            return self._data_objects[sel_row_index]
        except IndexError:
            # can this happen? self._data_objects can't
            # be empty if model is constructed
            pass
Example #9
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    class Inputs:
        evaluation_results = Input("Evaluation Results", Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = ["Number of instances",
                  "Proportion of predicted",
                  "Proportion of actual"]

    settings_version = 1
    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting([0], schema_only=True)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances",
            "click_cell")]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg("Evaluation Results input contains invalid values")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(
            self.controlArea, self, "selected_learner", "learners", box=True,
            callback=self._learner_changed
        )

        self.outputbox = gui.vBox(self.controlArea, "Output")
        box = gui.hBox(self.outputbox)
        gui.checkBox(box, self, "append_predictions",
                     "Predictions", callback=self._invalidate)
        gui.checkBox(box, self, "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.outputbox, self, "autocommit",
                        "Send Selected", "Send Automatically", box=False)

        self.mainArea.layout().setContentsMargins(0, 0, 0, 0)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox, self, "selected_quantity",
                     items=self.quantities, label="Show: ",
                     orientation=Qt.Horizontal, callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox, self, "Select Correct",
                   callback=self.select_correct, autoDefault=False)
        gui.button(selbox, self, "Select Misclassified",
                   callback=self.select_wrong, autoDefault=False)
        gui.button(selbox, self, "Clear Selection",
                   callback=self.select_none, autoDefault=False)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        else:
            self.Error.no_regression.clear()

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        if nan_values:
            self.Error.invalid_values()
        else:
            self.Error.invalid_values.clear()

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]
            self.headers = class_values + \
                           [unicodedata.lookup("N-ARY SUMMATION")]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = ["Learner #{}".format(i + 1)
                                 for i in range(nmodels)]

            self._init_table(len(class_values))
            self.openContext(data.domain.class_var)
            if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners):
                if self.learners:
                    self.selected_learner[:] = [0]
            else:
                self.selected_learner[:] = prev_sel_learner
            self._update()
            self._set_selection()
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [i for i, t in enumerate(zip(actual, predicted))
                    if t in indices]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            var = Orange.data.DiscreteVariable(
                "{}({})".format(class_var.name, learner_name),
                class_var.values
            )
            metas = metas + (var,)

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [Orange.data.ContinuousVariable("p({})".format(value))
                     for value in class_var.values]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars,
                                    metas)
        data = self.data.transform(domain)
        if len(extra):
            data.metas[:, len(self.data.domain.metas):] = \
                np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(np.int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 - 30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".
                format(self.learners[self.selected_learner[0]],
                       self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Example #10
0
    def __set_index(self, f):
        # type: (Future) -> None
        # set results from `list_remote` query.
        assert QThread.currentThread() is self.thread()
        assert f.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        allinfolocal = list_local()
        try:
            res = f.result()
        except Exception as er:
            log = logging.getLogger(__name__)
            log.exception("Error while fetching updated index")
            if not allinfolocal:
                self.error("Could not fetch data set list")
            else:
                self.warning("Could not fetch data sets list, only local "
                             "cached data sets are shown")
            res = {}

        allinforemote = res  # type: Dict[Tuple[str, str], dict]
        allkeys = set(allinfolocal)
        if allinforemote is not None:
            allkeys = allkeys | set(allinforemote)
        allkeys = sorted(allkeys)

        def info(prefix, filename):
            if (prefix, filename) in allinforemote:
                info = allinforemote[prefix, filename]
            else:
                info = allinfolocal[prefix, filename]
            islocal = (prefix, filename) in allinfolocal

            return namespace(prefix=prefix,
                             filename=filename,
                             title=info.get("title", filename),
                             datetime=info.get("datetime", None),
                             description=info.get("description", None),
                             reference=info.get("reference", None),
                             instances=info.get("instances", None),
                             variables=info.get("variables", None),
                             target=info.get("target", None),
                             missing=info.get("missing", None),
                             tags=info.get("tags", []),
                             size=info.get("size", None),
                             islocal=islocal)

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(HEADER)

        current_index = -1
        for i, (prefix, filename) in enumerate(allkeys):
            datainfo = info(prefix, filename)
            item1 = QStandardItem()
            item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole)
            item1.setData(datainfo, Qt.UserRole)
            item2 = QStandardItem(datainfo.title)
            item3 = QStandardItem()
            item3.setData(datainfo.size, Qt.DisplayRole)
            item4 = QStandardItem()
            item4.setData(datainfo.instances, Qt.DisplayRole)
            item5 = QStandardItem()
            item5.setData(datainfo.variables, Qt.DisplayRole)
            item6 = QStandardItem()
            item6.setData(datainfo.target, Qt.DisplayRole)
            item6.setIcon(variable_icon(datainfo.target))
            item7 = QStandardItem()
            item7.setData(", ".join(datainfo.tags), Qt.DisplayRole)
            row = [item1, item2, item3, item4, item5, item6, item7]
            model.appendRow(row)

            if (prefix, filename) == self.selected_id:
                current_index = i

        hs = self.view.header().saveState()
        model_ = self.view.model()
        self.view.setModel(model)
        self.view.header().restoreState(hs)
        model_.deleteLater()
        model_.setParent(None)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection)
        # Update the info text
        self.infolabel.setText("{} datasets \n{} datasets cached".format(
            model.rowCount(), len(allinfolocal)))

        if current_index != -1:
            selmodel = self.view.selectionModel()
            selmodel.select(
                model.index(current_index, 0),
                QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows)
Example #11
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 70

    inputs = [(IO.DATA, Table, 'set_data')]
    outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)]

    search_indices = ContextSetting([0])  # features included in search
    display_indices = ContextSetting([0])  # features for display
    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.corpus_docs = None  # Documents generated from Corpus
        self.output_mask = []  # Output corpus indices
        self.doc_webview = None  # WebView for showing content
        self.search_features = [
        ]  # two copies are needed since Display allows drag & drop
        self.display_features = []

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea,
            self,
            'search_indices',
            'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features',
            callback=self.regenerate_docs,
        )

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box,
            self,
            'display_indices',
            'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs,
            enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box,
                                                 self,
                                                 'show_tokens',
                                                 'Show Tokens && Tags',
                                                 callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data',
                        'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea,
                                         self,
                                         '',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(
                                             QSizePolicy.MinimumExpanding,
                                             QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    def set_data(self, data=None):
        self.reset_widget()
        self.corpus = data
        if data is not None:
            if not isinstance(data, Corpus):
                self.corpus = Corpus.from_table(data.domain, data)
            self.load_features()
            self.regenerate_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.display_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def load_features(self):
        self.search_indices = []
        self.display_indices = []
        if self.corpus is not None:
            domain = self.corpus.domain
            self.search_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            # FIXME: Select features based on ContextSetting
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))

            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

    def list_docs(self):
        """ List documents into the left scrolling area """
        search_keyword = self.filter_input.text().strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(
                zip(self.corpus, self.corpus.titles, self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [
            f for i, f in enumerate(self.search_features)
            if i in self.search_indices
        ]

        html = '<table>'
        for doc_count, index in enumerate(
                self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:  # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = str(index.data(Qt.UserRole)[feature.name])
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'

        # QUrl is a workaround to allow local resources
        # https://bugreports.qt.io/browse/QTBUG-55902?focusedCommentId=335945
        self.doc_webview.setHtml(HTML.format(html), QUrl("file://"))
        self.load_js()
        self.highlight_docs()

    def load_js(self):
        resources = os.path.join(os.path.dirname(__file__), 'resources')
        for script in (
                'jquery-3.1.1.min.js',
                'jquery.mark.min.js',
                'highlighter.js',
        ):
            self.doc_webview.evalJS(
                open(os.path.join(resources, script), encoding='utf-8').read())

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)
            self.refresh_search()

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.update_info()

    def highlight_docs(self):
        search_keyword = self.filter_input.text().\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js
        if search_keyword:
            self.doc_webview.evalJS('mark("{}");'.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(),
                                             self.n_documents)
            self.n_tokens = sum(
                map(len,
                    self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(
                self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.send(IO.MATCHED, matched)
            self.send(IO.UNMATCHED, unmatched)
        else:
            self.send(IO.MATCHED, None)
            self.send(IO.UNMATCHED, None)
Example #12
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 70

    inputs = [(IO.DATA, Table, 'set_data')]
    outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)]

    search_indices = ContextSetting([0])   # features included in search
    display_indices = ContextSetting([0])  # features for display
    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.output_mask = []           # Output corpus indices
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_features = []

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.regenerate_docs,)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, '',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        # For PyQt5 WebEngine's setHtml grabs the focus and makes typing hard
        # More info: http://stackoverflow.com/questions/36609489
        # To bypass the annoying behaviour disable the widget for WebEngine
        self.doc_webview = gui.WebviewWidget(self.splitter, self,
                                             debug=True, enabled=HAVE_WEBKIT)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    def set_data(self, data=None):
        self.reset_widget()
        self.corpus = data
        if data is not None:
            if not isinstance(data, Corpus):
                self.corpus = Corpus.from_table(data.domain, data)
            self.load_features()
            self.regenerate_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.display_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def load_features(self):
        self.search_indices = []
        self.display_indices = []
        if self.corpus is not None:
            domain = self.corpus.domain
            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            # FIXME: Select features based on ContextSetting
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))

            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

    def list_docs(self):
        """ List documents into the left scrolling area """
        search_keyword = self.filter_input.text().strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)          # Select the first document
        else:
            self.doc_webview.setHtml('')
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = index.data(Qt.UserRole)[feature.name]
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'

        self.doc_webview.setHtml(HTML.format(html))
        self.load_js()
        self.highlight_docs()

    def load_js(self):
        resources = os.path.join(os.path.dirname(__file__), 'resources')
        for script in ('jquery-3.1.1.min.js', 'jquery.mark.min.js', 'highlighter.js', ):
            self.doc_webview.evalJS(open(os.path.join(resources, script), encoding='utf-8').read())

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)
            self.refresh_search()

    def refresh_search(self):
        if self.corpus:
            self.list_docs()
            self.update_info()

    def highlight_docs(self):
        search_keyword = self.filter_input.text().\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js
        if search_keyword:
            self.doc_webview.evalJS('mark("{}");'.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents)
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask]
            unmatched = self.corpus[unmatched_mask]
            self.send(IO.MATCHED, matched)
            self.send(IO.UNMATCHED, unmatched)
        else:
            self.send(IO.MATCHED, None)
            self.send(IO.UNMATCHED, None)
Example #13
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL
    )

    search_indices = ContextSetting([], exclude_metas=False)   # features included in search
    display_indices = ContextSetting([], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    regexp_filter = ContextSetting("")

    selection = [0]  # TODO: DataHashContextHandler

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.output_mask = []           # Output corpus indices
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_list_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selection = [0]
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        search_keyword = self.regexp_filter.strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

    def reset_selection(self):
        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')

    def set_selection(self):
        view = self.doc_list
        if len(self.selection):
            selection = QItemSelection()

            for row in self.selection:
                selection.append(
                    QItemSelectionRange(
                        view.model().index(row, 0),
                        view.model().index(row, 0)
                    )
                )
            view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}
        
        .content {{
            /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */
        
            /* These are technically the same, but use both */
            overflow-wrap: break-word;
            word-wrap: break-word;
        
            -ms-word-break: break-all;
            /* This is the dangerous one in WebKit, as it breaks things wherever */
            word-break: break-all;
            /* Instead use this non-standard one: */
            word-break: break-word;
        
            /* Adds a hyphen where the word breaks, if supported (No Blink) */
            -ms-hyphens: auto;
            -moz-hyphens: auto;
            -webkit-hyphens: auto;
            hyphens: auto;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        selection = [i.row() for i in self.doc_list.selectionModel().selectedRows()]
        if selection != []:
            self.selection = selection
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                value = str(index.data(Qt.UserRole)[feature.name])
                if feature in marked_search_features:
                    value = self.__mark_text(value)
                value = value.replace('\n', '<br/>')
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td class="content">{}</td></tr>'.format(
                    feature.name, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def __mark_text(self, text):
        search_keyword = self.regexp_filter.strip('|')
        if not search_keyword:
            return text

        try:
            reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
        except sre_constants.error:
            return text

        matches = list(reg.finditer(text))
        if not matches:
            return text

        text = list(text)
        for m in matches[::-1]:
            text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
                .format("".join(text[m.start():m.end()])))

        return "".join(text)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.reset_selection()
            self.update_info()
            self.commit()

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents)
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
        ))
Example #14
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    search_indices = ContextSetting(
        [], exclude_metas=False)  # features included in search
    display_indices = ContextSetting(
        [], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    regexp_filter = ContextSetting("")

    selection = [0]  # TODO: DataHashContextHandler

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.corpus_docs = None  # Documents generated from Corpus
        self.output_mask = []  # Output corpus indices
        self.doc_webview = None  # WebView for showing content
        self.search_features = [
        ]  # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea,
            self,
            'search_indices',
            'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features',
            callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box,
            self,
            'display_list_indices',
            'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs,
            enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box,
                                                 self,
                                                 'show_tokens',
                                                 'Show Tokens && Tags',
                                                 callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data',
                        'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea,
                                         self,
                                         'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(
                                             QSizePolicy.MinimumExpanding,
                                             QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
        self.doc_webview.loadFinished.connect(self.highlight_docs)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selection = [0]
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        search_keyword = self.regexp_filter.strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(
                zip(self.corpus, self.corpus.titles, self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

    def reset_selection(self):
        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')

    def set_selection(self):
        view = self.doc_list
        if len(self.selection):
            selection = QItemSelection()

            for row in self.selection:
                selection.append(
                    QItemSelectionRange(view.model().index(row, 0),
                                        view.model().index(row, 0)))
            view.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [
            f for i, f in enumerate(self.search_features)
            if i in self.search_indices
        ]

        html = '<table>'
        selection = [
            i.row() for i in self.doc_list.selectionModel().selectedRows()
        ]
        if selection != []:
            self.selection = selection
        for doc_count, index in enumerate(
                self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:  # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = str(index.data(Qt.UserRole)[feature.name])
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.reset_selection()
            self.update_info()
            self.commit()

    @Slot()
    def highlight_docs(self):
        search_keyword = self.regexp_filter.\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js

        if search_keyword:
            # mark is undefined when clearing the view (`setHtml('')`). Maybe
            # set and template html with all the scripts, ... but no contents?
            self.doc_webview.runJavaScript('''
                    if (typeof mark !== "undefined") {{
                        mark("{}");
                    }}
                '''.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(),
                                             self.n_documents)
            self.n_tokens = sum(
                map(len,
                    self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(
                self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)
Example #15
0
    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")
Example #16
0
class OWPreprocess(widget.OWWidget, openclass=True):
    name = "Preprocess"
    description = "Construct a data preprocessing pipeline."
    icon = "icons/Preprocess.svg"
    priority = 2105
    keywords = ["process"]

    settings_version = 2

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        preprocessor = Output("Preprocessor",
                              preprocess.preprocess.Preprocess,
                              dynamic=False)
        preprocessed_data = Output("Preprocessed Data", Orange.data.Table)

    storedsettings = Setting({})
    autocommit = Setting(True)
    PREPROCESSORS = PREPROCESS_ACTIONS
    CONTROLLER = Controller

    def __init__(self):
        super().__init__()

        self.data = None
        self._invalidated = False

        # List of available preprocessors (DescriptionRole : Description)
        self.preprocessors = QStandardItemModel()

        def mimeData(indexlist):
            assert len(indexlist) == 1
            index = indexlist[0]
            qname = index.data(DescriptionRole).qualname
            m = QMimeData()
            m.setData("application/x-qwidget-ref", qname.encode("utf-8"))
            return m

        # TODO: Fix this (subclass even if just to pass a function
        # for mimeData delegate)
        self.preprocessors.mimeData = mimeData

        box = gui.vBox(self.controlArea, "Preprocessors")
        gui.rubber(self.controlArea)

        # we define a class that lets us set the vertical sizeHint
        # based on the height and number of items in the list
        # see self.__update_list_sizeHint

        class ListView(QListView):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)
                self.vertical_hint = None

            def sizeHint(self):
                sh = super().sizeHint()
                if self.vertical_hint:
                    return QSize(sh.width(), self.vertical_hint)
                return sh

        self.preprocessorsView = view = ListView(
            selectionMode=QListView.SingleSelection,
            dragEnabled=True,
            dragDropMode=QListView.DragOnly)
        view.setModel(self.preprocessors)
        view.activated.connect(self.__activated)

        box.layout().addWidget(view)

        ####
        self._qname2ppdef = {
            ppdef.qualname: ppdef
            for ppdef in self.PREPROCESSORS
        }

        # List of 'selected' preprocessors and their parameters.
        self.preprocessormodel = None

        self.flow_view = SequenceFlow()
        self.controler = self.CONTROLLER(self.flow_view, parent=self)

        self.overlay = OverlayWidget(self)
        self.overlay.setAttribute(Qt.WA_TransparentForMouseEvents)
        self.overlay.setWidget(self.flow_view)
        self.overlay.setLayout(QVBoxLayout())
        self.overlay.layout().addWidget(
            QLabel("Drag items from the list on the left", wordWrap=True))

        self.scroll_area = QScrollArea(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn)
        self.scroll_area.viewport().setAcceptDrops(True)
        self.scroll_area.setWidget(self.flow_view)
        self.scroll_area.setWidgetResizable(True)
        self.mainArea.layout().addWidget(self.scroll_area)
        self.flow_view.installEventFilter(self)

        gui.auto_apply(self.buttonsArea, self, "autocommit")

        self._initialize()

    def _initialize(self):
        for pp_def in self.PREPROCESSORS:
            description = pp_def.description
            if description.icon:
                icon = QIcon(description.icon)
            else:
                icon = QIcon()
            item = QStandardItem(icon, description.title)
            item.setToolTip(description.summary or "")
            item.setData(pp_def, DescriptionRole)
            item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable
                          | Qt.ItemIsDragEnabled)
            self.preprocessors.appendRow([item])

        self.__update_list_sizeHint()

        model = self.load(self.storedsettings)

        self.set_model(model)

        if not model.rowCount():
            # enforce default width constraint if no preprocessors
            # are instantiated (if the model is not empty the constraints
            # will be triggered by LayoutRequest event on the `flow_view`)
            self.__update_size_constraint()

        self.apply()

    def __update_list_sizeHint(self):
        view = self.preprocessorsView

        h = view.sizeHintForRow(0)
        n = self.preprocessors.rowCount()
        view.vertical_hint = n * h + 2  # only on Mac?
        view.updateGeometry()

    def load(self, saved):
        """Load a preprocessor list from a dict."""
        preprocessors = saved.get("preprocessors", [])
        model = StandardItemModel()

        def dropMimeData(data, action, row, _column, _parent):
            if data.hasFormat("application/x-qwidget-ref") and \
                    action == Qt.CopyAction:
                qname = bytes(data.data("application/x-qwidget-ref")).decode()

                ppdef = self._qname2ppdef[qname]
                item = QStandardItem(ppdef.description.title)
                item.setData({}, ParametersRole)
                item.setData(ppdef.description.title, Qt.DisplayRole)
                item.setData(ppdef, DescriptionRole)
                self.preprocessormodel.insertRow(row, [item])
                return True
            else:
                return False

        model.dropMimeData = dropMimeData

        for qualname, params in preprocessors:
            pp_def = self._qname2ppdef[qualname]
            description = pp_def.description
            item = QStandardItem(description.title)
            if description.icon:
                icon = QIcon(description.icon)
            else:
                icon = QIcon()
            item.setIcon(icon)
            item.setToolTip(description.summary)
            item.setData(pp_def, DescriptionRole)
            item.setData(params, ParametersRole)

            model.appendRow(item)
        return model

    @staticmethod
    def save(model):
        """Save the preprocessor list to a dict."""
        d = {"name": ""}
        preprocessors = []
        for i in range(model.rowCount()):
            item = model.item(i)
            pp_def = item.data(DescriptionRole)
            params = item.data(ParametersRole)
            preprocessors.append((pp_def.qualname, params))

        d["preprocessors"] = preprocessors
        return d

    def set_model(self, ppmodel):
        if self.preprocessormodel:
            self.preprocessormodel.dataChanged.disconnect(
                self.__on_modelchanged)
            self.preprocessormodel.rowsInserted.disconnect(
                self.__on_modelchanged)
            self.preprocessormodel.rowsRemoved.disconnect(
                self.__on_modelchanged)
            self.preprocessormodel.rowsMoved.disconnect(self.__on_modelchanged)
            self.preprocessormodel.deleteLater()

        self.preprocessormodel = ppmodel
        self.controler.setModel(ppmodel)
        if ppmodel is not None:
            self.preprocessormodel.dataChanged.connect(self.__on_modelchanged)
            self.preprocessormodel.rowsInserted.connect(self.__on_modelchanged)
            self.preprocessormodel.rowsRemoved.connect(self.__on_modelchanged)
            self.preprocessormodel.rowsMoved.connect(self.__on_modelchanged)

        self.__update_overlay()

    def __update_overlay(self):
        if self.preprocessormodel is None or \
                self.preprocessormodel.rowCount() == 0:
            self.overlay.setWidget(self.flow_view)
            self.overlay.show()
        else:
            self.overlay.setWidget(None)
            self.overlay.hide()

    def __on_modelchanged(self):
        self.__update_overlay()
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data=None):
        """Set the input dataset."""
        self.data = data

    def handleNewSignals(self):
        self.apply()

    def __activated(self, index):
        item = self.preprocessors.itemFromIndex(index)
        action = item.data(DescriptionRole)
        item = QStandardItem()
        item.setData({}, ParametersRole)
        item.setData(action.description.title, Qt.DisplayRole)
        item.setData(action, DescriptionRole)
        self.preprocessormodel.appendRow([item])

    def buildpreproc(self):
        plist = []
        for i in range(self.preprocessormodel.rowCount()):
            item = self.preprocessormodel.item(i)
            desc = item.data(DescriptionRole)
            params = item.data(ParametersRole)

            if not isinstance(params, dict):
                params = {}

            create = desc.viewclass.createinstance
            plist.append(create(params))

        if len(plist) == 1:
            return plist[0]
        else:
            return preprocess.preprocess.PreprocessorList(plist)

    def apply(self):
        # Sync the model into storedsettings on every apply.
        self.storeSpecificSettings()
        preprocessor = self.buildpreproc()

        if self.data is not None:
            self.error()
            try:
                data = preprocessor(self.data)
            except (ValueError, ZeroDivisionError) as e:
                self.error(str(e))
                return
        else:
            data = None

        self.Outputs.preprocessor.send(preprocessor)
        self.Outputs.preprocessed_data.send(data)

    def commit(self):
        if not self._invalidated:
            self._invalidated = True
            QApplication.postEvent(self, QEvent(QEvent.User))

    def customEvent(self, event):
        if event.type() == QEvent.User and self._invalidated:
            self._invalidated = False
            self.apply()

    def eventFilter(self, receiver, event):
        if receiver is self.flow_view and event.type() == QEvent.LayoutRequest:
            QTimer.singleShot(0, self.__update_size_constraint)

        return super().eventFilter(receiver, event)

    def storeSpecificSettings(self):
        """Reimplemented."""
        self.storedsettings = self.save(self.preprocessormodel)
        super().storeSpecificSettings()

    def saveSettings(self):
        """Reimplemented."""
        self.storedsettings = self.save(self.preprocessormodel)
        super().saveSettings()

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2:
            for action, params in settings["storedsettings"]["preprocessors"]:
                if action == "orange.preprocess.scale":
                    scale = center = None
                    if "center" in params:
                        center = params.pop("center").name
                    if "scale" in params:
                        scale = params.pop("scale").name
                    migratable = {
                        ("Mean", "NoScaling"): Scale.CenterByMean,
                        ("NoCentering", "Std"): Scale.ScaleBySD,
                        ("Mean", "Std"): Scale.NormalizeBySD,
                        ("NoCentering", "Span"):
                        Scale.NormalizeBySpan_ZeroBased
                    }
                    params["method"] = \
                        migratable.get((center, scale), Scale.NormalizeBySD)

    def onDeleteWidget(self):
        self.data = None
        self.set_model(None)
        super().onDeleteWidget()

    @Slot()
    def __update_size_constraint(self):
        # Update minimum width constraint on the scroll area containing
        # the 'instantiated' preprocessor list (to avoid the horizontal
        # scroll bar).
        sh = self.flow_view.minimumSizeHint()
        scroll_width = self.scroll_area.verticalScrollBar().width()
        self.scroll_area.setMinimumWidth(
            min(max(sh.width() + scroll_width + 2, self.controlArea.width()),
                520))

    def send_report(self):
        pp = [(self.controler.model().index(i, 0).data(Qt.DisplayRole), w)
              for i, w in enumerate(self.controler.view.widgets())]
        if pp:
            self.report_items("Settings", pp)
class OWBatchNorm(OWWidget):
    name = "Batch Effect Removal"
    description = "Batch effect normalization on Single Cell data set."
    icon = "icons/BatchEffectRemoval.svg"
    priority = 230

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)

    class Error(OWWidget.Error):
        general_error = Msg({})
        discrete_attributes = Msg("Data with discrete attributes "
                                  "can not be processed.")

    class Warning(OWWidget.Warning):
        missing_values = Msg("Missing values have been replaced with 0.")
        negative_values = Msg("Unable to use current settings due "
                              "to negative values in data.")

    resizing_enabled = False
    want_main_area = False

    settingsHandler = PerfectDomainContextHandler()
    batch_vars = ContextSetting([])
    link_method = Setting(LinkMethod.IDENTITY_LINK)
    skip_zeros = Setting(False)
    auto_commit = Setting(True)

    def __init__(self, parent=None):
        super().__init__(parent)
        self.data = None

        # Info
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.info_label = gui.widgetLabel(infobox, "No data on input.")

        # Link method
        method_box = gui.widgetBox(self.controlArea, "Method")
        gui.comboBox(method_box,
                     self,
                     "link_method",
                     items=LinkMethod.items(),
                     callback=self.__link_method_changed)
        gui.separator(method_box)
        self.skip_zeros_check = gui.checkBox(
            method_box,
            self,
            "skip_zeros",
            "Skip zero expressions",
            enabled=self.link_method != LinkMethod.LOG_LINK,
            callback=lambda: self.commit())

        # Batch Variable Selection
        header_shema = (("selected", ""), ("variable", "Variable"),
                        ("count", "#"), ("score", "Score"))
        header_labels = labels = [label for _, label in header_shema]
        header = namedtuple("header", [tag for tag, _ in header_shema])
        self.Header = header(*[index for index, _ in enumerate(labels)])

        batch_box = gui.widgetBox(self.controlArea, "Batch Variable Selection")
        self.view = QTreeView()
        self.model = QStandardItemModel()
        self.model.itemChanged.connect(self.__selected_batch_vars_changed)
        self.model.setHorizontalHeaderLabels(header_labels)
        batch_box.layout().addWidget(self.view)
        self._setup_view()

        gui.auto_commit(self.controlArea, self, "auto_commit", "Apply",
                        "Apply Automatically")

    def __link_method_changed(self):
        enable = self.link_method != LinkMethod.LOG_LINK
        self.skip_zeros_check.setEnabled(enable)
        if not enable:
            self.skip_zeros_check.setChecked(True)
        self.commit()

    def __selected_batch_vars_changed(self, item):
        if item.checkState():
            self.batch_vars.append(item.data(VariableRole))
        else:
            self.batch_vars.remove(item.data(VariableRole))
        self.commit()

    def _setup_view(self):
        self.view.setModel(self.model)
        self.view.setSelectionMode(QTreeView.NoSelection)
        self.view.setSortingEnabled(True)
        self.view.setRootIsDecorated(False)
        self.view.setItemDelegateForColumn(self.Header.count,
                                           IntegralDelegate(self))
        self.view.setItemDelegateForColumn(self.Header.score,
                                           RealDelegate(self))
        self.view.header().setSectionResizeMode(QHeaderView.ResizeToContents)
        self.view.header().setStretchLastSection(False)
        self.view.header().setSectionResizeMode(self.Header.variable,
                                                QHeaderView.Stretch)
        self.view.setFocus()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.clear()
        self.data = data
        self._setup_info_label()
        self._check_data()
        self.openContext(data)
        if self.data is not None:
            self.batch_vars = [data.domain[v.name] for v in self.batch_vars]
            self._setup_model()
        self.commit()

    def clear(self):
        self.batch_vars = []
        if self.model:
            n_rows = self.model.rowCount()
            self.model.removeRows(0, n_rows)

    def _setup_info_label(self):
        text = "No data on input."
        if self.data is not None:
            domain, attrs = self.data.domain, self.data.domain.attributes
            text = "{} cells, {} genes\n".format(len(self.data), len(attrs))
            text += "{} meta features".format(len(domain.metas)) \
                if len(domain.metas) else "(no meta features)"
        self.info_label.setText(text)

    def _check_data(self):
        self.clear_messages()
        if self.data and self.data.domain.has_discrete_attributes():
            self.data = None
            self.Error.discrete_attributes()
        if self.data and np.isnan(self.data.X).any():
            self.data.X = np.nan_to_num(self.data.X)
            self.Warning.missing_values()

    def _setup_model(self):
        estimator = ScBatchScorer()
        for var in self.data.domain.class_vars + self.data.domain.metas:
            if not var.is_primitive():
                continue
            try:
                score = float(estimator.score_data(self.data, var))
            except Exception:
                score = np.nan
            self.model.appendRow([
                self.__selected_item(var),
                self.__variable_item(var),
                self.__count_item(var),
                self.__score_item(score)
            ])

    def __selected_item(self, var):
        item = QStandardItem()
        item.setData(var, VariableRole)
        item.setCheckable(True)
        select = var in self.batch_vars
        item.setCheckState(Qt.Checked if select else Qt.Unchecked)
        item.setEditable(False)
        return item

    def __variable_item(self, var):
        item = QStandardItem()
        item.setData(var.name, Qt.DisplayRole)
        item.setData(gui.attributeIconDict[var], Qt.DecorationRole)
        item.setEditable(False)
        return item

    def __count_item(self, var):
        item = QStandardItem()
        if var.is_discrete:
            item.setData(len(var.values), Qt.DisplayRole)
        item.setEditable(False)
        return item

    def __score_item(self, score):
        item = QStandardItem()
        item.setData(score, Qt.DisplayRole)
        item.setEditable(False)
        return item

    def commit(self):
        data = None
        self.Error.general_error.clear()
        self.Warning.negative_values.clear()
        if self.data is not None:
            if (self.data.X < 0).any() and self.skip_zeros:
                self.Warning.negative_values()
                data = self.data
            else:
                try:
                    data = SCBatchNormalizer(
                        LinkMethod.items()[self.link_method], self.skip_zeros,
                        self.batch_vars)(self.data)
                except Exception as e:
                    self.Error.general_error(str(e))
                    data = None
        self.Outputs.data.send(data)

    def send_report(self):
        method = LinkMethod.items()[self.link_method]
        if self.skip_zeros:
            method += " (Skip zero expressions)"
        variables = ", ".join([v.name for v in self.batch_vars]) \
            if self.batch_vars else "None"
        self.report_items("", [("Method", method),
                               ("Batch variable selection", variables)])