def test_deprecated_str_as_var(self):
        if LooseVersion(Orange.__version__) >= LooseVersion("3.26"):
            # pragma: no cover
            self.fail("Remove support for variables stored as string settings "
                      "and this test.")

        context = Mock()
        context.attributes = {"foo": 2}
        context.metas = {}
        setting = ContextSetting("")
        setting.name = "setting_name"
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            DomainContextHandler.encode_setting(context, setting, "foo")
            self.assertIn("setting_name", w[0].message.args[0])
    def test_filter_value(self):
        setting = ContextSetting([])
        setting.name = "value"

        def test_filter(before_value, after_value):
            data = dict(value=before_value)
            self.handler.filter_value(setting, data, *self.args)
            self.assertEqual(data.get("value", None), after_value)

        # filter list values
        test_filter([], [])
        # When list contains attributes asa tuple of (name, type),
        # Attributes not present in domain should be filtered out
        test_filter([("d1", Discrete), ("d1", Continuous),
                     ("c1", Continuous), ("c1", Discrete)],
                    [("d1", Discrete), ("c1", Continuous)])
        # All other values in list should remain
        test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])
Beispiel #3
0
class OWLinearProjection(OWAnchorProjectionWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2),
                     type=int, qualname="OWLinearProjection.Placement")

    Projection_name = {Placement.Circular: "Circular Placement",
                       Placement.LDA: "Linear Discriminant Analysis",
                       Placement.PCA: "Principal Component Analysis"}

    settings_version = 5

    placement = Setting(Placement.Circular)
    selected_vars = ContextSetting([])
    vizrank = SettingProvider(LinearProjectionVizRank)
    GRAPH_CLASS = OWLinProjGraph
    graph = SettingProvider(OWLinProjGraph)

    class Error(OWAnchorProjectionWidget.Error):
        no_cont_features = Msg("Plotting requires numeric features")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.removed.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            None, self, "Suggest Features", self.__vizrank_set_attrs)

        super().__init__()

    def _add_controls(self):
        self._add_controls_variables()
        self._add_controls_placement()
        super()._add_controls()
        self.gui.add_control(
            self._effects_box, gui.hSlider, "Hide radius:", master=self.graph,
            value="hide_radius", minValue=0, maxValue=100, step=10,
            createLabel=False, callback=self.__radius_slider_changed
        )
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    def _add_controls_variables(self):
        self.variables_selection = VariablesSelection(
            self, self.model_selected, self.model_other, self.controlArea
        )
        self.variables_selection.added.connect(self.__model_selected_changed)
        self.variables_selection.removed.connect(self.__model_selected_changed)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank
        )

    def _add_controls_placement(self):
        box = gui.widgetBox(
            self.controlArea, True,
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)
        )
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=[self.Projection_name[x] for x in self.Placement],
            callback=self.__placement_radio_changed
        )

    @property
    def continuous_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_continuous]

    @property
    def effective_variables(self):
        return self.model_selected[:]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.continuous_variables
                               if var not in attrs]
        self.__model_selected_changed()

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var)) for var
                              in self.model_selected]
        self.projection = None
        self._check_options()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __placement_radio_changed(self):
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)
        self.projection = self.projector = None
        self._init_vizrank()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __radius_slider_changed(self):
        self.graph.update_radius()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        self._check_options()
        self._init_vizrank()
        self.init_projection()

    def use_context(self):
        self.model_selected.clear()
        self.model_other.clear()
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[attr] for attr in selected]
            self.model_other[:] = [d[attr.name] for attr in
                                   self.continuous_variables
                                   if attr.name not in selected]
        elif self.data is not None:
            self.model_selected[:] = self.continuous_variables[:3]
            self.model_other[:] = self.continuous_variables[3:]

    def _check_options(self):
        buttons = self.radio_placement.buttons
        for btn in buttons:
            btn.setEnabled(True)

        if self.data is not None:
            has_discrete_class = self.data.domain.has_discrete_class
            if not has_discrete_class or len(np.unique(self.data.Y)) < 2:
                buttons[self.Placement.LDA].setEnabled(False)
                if self.placement == self.Placement.LDA:
                    self.placement = self.Placement.Circular

        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)

    def _init_vizrank(self):
        is_enabled, msg = False, ""
        if self.data is None:
            msg = "There is no data."
        elif self.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.attr_color.is_continuous and \
                self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear " \
                  "Discriminant Analysis Projection when " \
                  "continuous color variable is selected."
        elif len([v for v in self.continuous_variables
                  if v is not self.attr_color]) < 3:
            msg = "Not enough available continuous variables"
        elif len(self.data[self.valid_data]) < 2:
            msg = "Not enough valid data instances"
        else:
            is_enabled = not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all()
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if not len(self.continuous_variables):
                error(self.Error.no_cont_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def init_projection(self):
        if self.placement == self.Placement.Circular:
            self.projector = CircularPlacement()
        elif self.placement == self.Placement.LDA:
            self.projector = LDA(solver="eigen", n_components=2)
        elif self.placement == self.Placement.PCA:
            self.projector = PCA(n_components=2)
            self.projector.component = 2
            self.projector.preprocessors = PCA.preprocessors + [Normalize()]

        super().init_projection()

    def get_coordinates_data(self):
        def normalized(a):
            span = np.max(a, axis=0) - np.min(a, axis=0)
            span[span == 0] = 1
            return (a - np.mean(a, axis=0)) / span

        embedding = self.get_embedding()
        if embedding is None:
            return None, None
        norm_emb = normalized(embedding[self.valid_data])
        return (norm_emb.ravel(), np.zeros(len(norm_emb), dtype=float)) \
            if embedding.shape[1] == 1 else norm_emb.T

    def _get_send_report_caption(self):
        def projection_name():
            return self.Projection_name[self.placement]

        return report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", self._get_caption_var_name(self.attr_color)),
            ("Label", self._get_caption_var_name(self.attr_label)),
            ("Shape", self._get_caption_var_name(self.attr_shape)),
            ("Size", self._get_caption_var_name(self.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and
             "{} %".format(self.graph.jitter_size))))

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph
        if version < 4:
            if "radius" in settings_:
                settings_["graph"]["hide_radius"] = settings_["radius"]
            if "selection_indices" in settings_ and \
                    settings_["selection_indices"] is not None:
                selection = settings_["selection_indices"]
                settings_["selection"] = [(i, 1) for i, selected in
                                          enumerate(selection) if selected]
        if version < 5:
            if "placement" in settings_ and \
                    settings_["placement"] not in cls.Placement:
                settings_["placement"] = cls.Placement.Circular

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
        if version == 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Beispiel #4
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001
    keywords = []

    class Inputs:
        evaluation_results = Input("Evaluation Results",
                                   Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settings_version = 1
    settingsHandler = ClassValuesContextHandler()

    selected_learner = Setting([0], schema_only=True)
    selection = ContextSetting(set())
    selected_quantity = Setting(0)
    append_predictions = Setting(True)
    append_probabilities = Setting(False)
    autocommit = Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg(
            "Evaluation Results input contains invalid values")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(self.controlArea,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        box=True,
                                        callback=self._learner_changed)

        self.outputbox = gui.vBox(self.controlArea, "Output")
        box = gui.hBox(self.outputbox)
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_apply(self.outputbox, self, "autocommit", box=False)

        self.mainArea.layout().setContentsMargins(0, 0, 0, 0)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     label="Show: ",
                     orientation=Qt.Horizontal,
                     callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

    @staticmethod
    def sizeHint():
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""
        # false positive, pylint: disable=no-member
        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        else:
            self.Error.no_regression.clear()

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        self.Error.invalid_values(shown=nan_values)

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
            return

        self.report_button.setDisabled(False)

        nmodels = results.predicted.shape[0]
        self.headers = class_values + \
                       [unicodedata.lookup("N-ARY SUMMATION")]

        # NOTE: The 'learner_names' is set in 'Test Learners' widget.
        self.learners = getattr(results, "learner_names",
                                [f"Learner #{i + 1}" for i in range(nmodels)])

        self._init_table(len(class_values))
        self.openContext(data.domain.class_var)
        if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners):
            if self.learners:
                self.selected_learner[:] = [0]
        else:
            self.selected_learner[:] = prev_sel_learner
        self._update()
        self._set_selection()
        self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            var = Orange.data.DiscreteVariable(
                "{}({})".format(class_var.name, learner_name),
                class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [
                Orange.data.ContinuousVariable("p({})".format(value))
                for value in class_var.values
            ]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if extra:
            data.metas[:, len(self.data.domain.metas):] = \
                np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(np.int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            maxval = normalized[diag].max()
            if maxval > 0:
                colors[diag] = normalized[diag] / maxval

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner[0]],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Beispiel #5
0
class OWFile(widget.OWWidget, RecentPathsWComboMixin):
    name = "File"
    id = "orange.widgets.data.file"
    description = "Read data from an input file or network " \
                  "and send a data table to the output."
    icon = "icons/File.svg"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        widget.OutputSignal(
            "Data",
            Table,
            doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False

    SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())]
    SIZE_LIMIT = 1e7
    LOCAL_FILE, URL = range(2)

    settingsHandler = PerfectDomainContextHandler()

    # Overload RecentPathsWidgetMixin.recent_paths to set defaults
    recent_paths = Setting([
        RecentPath("", "sample-datasets", "iris.tab"),
        RecentPath("", "sample-datasets", "titanic.tab"),
        RecentPath("", "sample-datasets", "housing.tab"),
        RecentPath("", "sample-datasets", "heart_disease.tab"),
    ])
    recent_urls = Setting([])
    source = Setting(LOCAL_FILE)
    xls_sheet = ContextSetting("")
    sheet_names = Setting({})
    url = Setting("")

    variables = ContextSetting([])

    dlg_formats = ("All readable files ({});;".format(
        '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join(
            "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
            for f in sorted(set(FileFormat.readers.values()),
                            key=list(FileFormat.readers.values()).index)))

    class Warning(widget.OWWidget.Warning):
        file_too_big = widget.Msg(
            "The file is too large to load automatically."
            " Press Reload to load.")

    def __init__(self):
        super().__init__()
        RecentPathsWComboMixin.__init__(self)
        self.domain = None
        self.data = None
        self.loaded_file = ""
        self.reader = None

        layout = QGridLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)
        vbox = gui.radioButtons(None,
                                self,
                                "source",
                                box=True,
                                addSpace=True,
                                callback=self.load_data,
                                addToLayout=False)

        rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False)
        layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter)

        box = gui.hBox(None, addToLayout=False, margin=0)
        box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.activated[int].connect(self.select_file)
        box.layout().addWidget(self.file_combo)
        layout.addWidget(box, 0, 1)

        file_button = gui.button(None,
                                 self,
                                 '...',
                                 callback=self.browse_file,
                                 autoDefault=False)
        file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon))
        file_button.setSizePolicy(Policy.Maximum, Policy.Fixed)
        layout.addWidget(file_button, 0, 2)

        reload_button = gui.button(None,
                                   self,
                                   "Reload",
                                   callback=self.load_data,
                                   autoDefault=False)
        reload_button.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed)
        layout.addWidget(reload_button, 0, 3)

        self.sheet_box = gui.hBox(None, addToLayout=False, margin=0)
        self.sheet_combo = gui.comboBox(None,
                                        self,
                                        "xls_sheet",
                                        callback=self.select_sheet,
                                        sendSelectedValue=True)
        self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_label = QLabel()
        self.sheet_label.setText('Sheet')
        self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft)
        self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter)
        layout.addWidget(self.sheet_box, 2, 1)
        self.sheet_box.hide()

        rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False)
        layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter)

        self.url_combo = url_combo = QComboBox()
        url_model = NamedURLModel(self.sheet_names)
        url_model.wrap(self.recent_urls)
        url_combo.setLineEdit(LineEditSelectOnFocus())
        url_combo.setModel(url_model)
        url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        url_combo.setEditable(True)
        url_combo.setInsertPolicy(url_combo.InsertAtTop)
        url_edit = url_combo.lineEdit()
        l, t, r, b = url_edit.getTextMargins()
        url_edit.setTextMargins(l + 5, t, r, b)
        layout.addWidget(url_combo, 3, 1, 3, 3)
        url_combo.activated.connect(self._url_set)

        box = gui.vBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, 'No data loaded.')
        self.warnings = gui.widgetLabel(box, '')

        box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)")
        domain_editor = DomainEditor(self.variables)
        self.editor_model = domain_editor.model()
        box.layout().addWidget(domain_editor)

        box = gui.hBox(self.controlArea)
        gui.button(box,
                   self,
                   "Browse documentation data sets",
                   callback=lambda: self.browse_file(True),
                   autoDefault=False)
        gui.rubber(box)
        box.layout().addWidget(self.report_button)
        self.report_button.setFixedWidth(170)

        self.apply_button = gui.button(box,
                                       self,
                                       "Apply",
                                       callback=self.apply_domain_edit)
        self.apply_button.setEnabled(False)
        self.apply_button.setFixedWidth(170)
        self.editor_model.dataChanged.connect(
            lambda: self.apply_button.setEnabled(True))

        self.set_file_list()
        # Must not call open_file from within __init__. open_file
        # explicitly re-enters the event loop (by a progress bar)

        self.setAcceptDrops(True)

        if self.source == self.LOCAL_FILE and \
                        os.path.getsize(self.last_path()) > self.SIZE_LIMIT:
            self.Warning.file_too_big()
            return

        QTimer.singleShot(0, self.load_data)

    def sizeHint(self):
        return QSize(600, 550)

    def select_file(self, n):
        assert n < len(self.recent_paths)
        super().select_file(n)
        if self.recent_paths:
            self.source = self.LOCAL_FILE
            self.load_data()
            self.set_file_list()

    def select_sheet(self):
        self.recent_paths[0].sheet = self.sheet_combo.currentText()
        self.load_data()

    def _url_set(self):
        self.source = self.URL
        self.load_data()

    def browse_file(self, in_demos=False):
        if in_demos:
            start_file = get_sample_datasets_dir()
            if not os.path.exists(start_file):
                QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with documentation data sets")
                return
        else:
            start_file = self.last_path() or os.path.expanduser("~/")

        filename, _ = QFileDialog.getOpenFileName(self,
                                                  'Open Orange Data File',
                                                  start_file, self.dlg_formats)
        if not filename:
            return
        self.loaded_file = filename
        self.add_path(filename)
        self.source = self.LOCAL_FILE
        self.load_data()

    # Open a file, create data from it and send it over the data channel
    def load_data(self):
        # We need to catch any exception type since anything can happen in
        # file readers
        # pylint: disable=broad-except
        self.editor_model.set_domain(None)
        self.apply_button.setEnabled(False)
        self.Warning.file_too_big.clear()

        error = None
        try:
            self.reader = self._get_reader()
            if self.reader is None:
                self.data = None
                self.send("Data", None)
                self.info.setText("No data.")
                self.sheet_box.hide()
                return
        except Exception as ex:
            error = ex

        if not error:
            self._update_sheet_combo()
            with catch_warnings(record=True) as warnings:
                try:
                    data = self.reader.read()
                except Exception as ex:
                    error = ex
                self.warning(warnings[-1].message.args[0] if warnings else '')

        if error:
            self.data = None
            self.send("Data", None)
            self.info.setText("An error occurred:\n{}".format(error))
            self.editor_model.reset()
            self.sheet_box.hide()
            return

        self.info.setText(self._describe(data))

        add_origin(data, self.loaded_file or self.last_path())
        self.send("Data", data)
        self.editor_model.set_domain(data.domain)
        self.data = data

    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            reader = FileFormat.get_reader(self.last_path())
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        elif self.source == self.URL:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)

    def _update_sheet_combo(self):
        if len(self.reader.sheets) < 2:
            self.sheet_box.hide()
            self.reader.select_sheet(None)
            return

        self.sheet_combo.clear()
        self.sheet_combo.addItems(self.reader.sheets)
        self._select_active_sheet()
        self.sheet_box.show()

    def _select_active_sheet(self):
        if self.reader.sheet:
            try:
                idx = self.reader.sheets.index(self.reader.sheet)
                self.sheet_combo.setCurrentIndex(idx)
            except ValueError:
                # Requested sheet does not exist in this file
                self.reader.select_sheet(None)
        else:
            self.sheet_combo.setCurrentIndex(0)

    def _describe(self, table):
        domain = table.domain
        text = ""

        attrs = getattr(table, "attributes", {})
        descs = [
            attrs[desc] for desc in ("Name", "Description") if desc in attrs
        ]
        if len(descs) == 2:
            descs[0] = "<b>{}</b>".format(descs[0])
        if descs:
            text += "<p>{}</p>".format("<br/>".join(descs))

        text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\
            format(len(table), len(domain.attributes), len(domain.metas))
        if domain.has_continuous_class:
            text += "<br/>Regression; numerical class."
        elif domain.has_discrete_class:
            text += "<br/>Classification; discrete class with {} values.".\
                format(len(domain.class_var.values))
        elif table.domain.class_vars:
            text += "<br/>Multi-target; {} target variables.".format(
                len(table.domain.class_vars))
        else:
            text += "<br/>Data has no target variable."
        text += "</p>"

        if 'Timestamp' in table.domain:
            # Google Forms uses this header to timestamp responses
            text += '<p>First entry: {}<br/>Last entry: {}</p>'.format(
                table[0, 'Timestamp'], table[-1, 'Timestamp'])
        return text

    def storeSpecificSettings(self):
        self.current_context.modified_variables = self.variables[:]

    def retrieveSpecificSettings(self):
        if hasattr(self.current_context, "modified_variables"):
            self.variables[:] = self.current_context.modified_variables

    def apply_domain_edit(self):
        attributes = []
        class_vars = []
        metas = []
        places = [attributes, class_vars, metas]
        X, y, m = [], [], []
        cols = [X, y, m]  # Xcols, Ycols, Mcols

        def is_missing(x):
            return str(x) in ("nan", "")

        for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \
            zip(count(), self.editor_model.variables,
                chain([(at, 0) for at in self.data.domain.attributes],
                      [(cl, 1) for cl in self.data.domain.class_vars],
                      [(mt, 2) for mt in self.data.domain.metas])):
            if place == 3:
                continue
            if orig_plc == 2:
                col_data = list(chain(*self.data[:, orig_var].metas))
            else:
                col_data = list(chain(*self.data[:, orig_var]))
            if name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == DiscreteVariable:
                values = list(
                    str(i) for i in set(col_data) if not is_missing(i))
                var = tpe(name, values)
                col_data = [
                    np.nan if is_missing(x) else values.index(str(x))
                    for x in col_data
                ]
            elif tpe == StringVariable and type(orig_var) == DiscreteVariable:
                var = tpe(name)
                col_data = [
                    orig_var.repr_val(x) if not np.isnan(x) else ""
                    for x in col_data
                ]
            else:
                var = tpe(name)
            places[place].append(var)
            cols[place].append(col_data)
        domain = Domain(attributes, class_vars, metas)
        X = np.array(X).T if len(X) else np.empty((len(self.data), 0))
        y = np.array(y).T if len(y) else None
        dtpe = object if any(
            isinstance(m, StringVariable) for m in domain.metas) else float
        m = np.array(m, dtype=dtpe).T if len(m) else None
        table = Table.from_numpy(domain, X, y, m, self.data.W)
        self.send("Data", table)
        self.apply_button.setEnabled(False)

    def get_widget_name_extension(self):
        _, name = os.path.split(self.loaded_file)
        return os.path.splitext(name)[0]

    def send_report(self):
        def get_ext_name(filename):
            try:
                return FileFormat.names[os.path.splitext(filename)[1]]
            except KeyError:
                return "unknown"

        if self.data is None:
            self.report_paragraph("File", "No file.")
            return

        if self.source == self.LOCAL_FILE:
            home = os.path.expanduser("~")
            if self.loaded_file.startswith(home):
                # os.path.join does not like ~
                name = "~/" + \
                       self.loaded_file[len(home):].lstrip("/").lstrip("\\")
            else:
                name = self.loaded_file
            if self.sheet_combo.isVisible():
                name += " ({})".format(self.sheet_combo.currentText())
            self.report_items("File", [("File name", name),
                                       ("Format", get_ext_name(name))])
        else:
            self.report_items("Data", [("Resource", self.url),
                                       ("Format", get_ext_name(self.url))])

        self.report_data("Data", self.data)

    def dragEnterEvent(self, event):
        """Accept drops of valid file urls"""
        urls = event.mimeData().urls()
        if urls:
            try:
                FileFormat.get_reader(
                    OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile())
                event.acceptProposedAction()
            except IOError:
                pass

    def dropEvent(self, event):
        """Handle file drops"""
        urls = event.mimeData().urls()
        if urls:
            self.add_path(
                OSX_NSURL_toLocalFile(urls[0])
                or urls[0].toLocalFile())  # add first file
            self.source = self.LOCAL_FILE
            self.load_data()
class OWFeatureStatistics(widget.OWWidget):
    name = 'Feature Statistics'
    description = 'Show basic statistics for data features.'
    icon = 'icons/FeatureStatistics.svg'

    class Inputs:
        data = Input('Data', Table, default=True)

    class Outputs:
        reduced_data = Output('Reduced Data', Table, default=True)
        statistics = Output('Statistics', Table)

    want_main_area = False

    settingsHandler = DomainContextHandler()
    settings_version = 2

    auto_commit = Setting(True)
    color_var = ContextSetting(None)  # type: Optional[Variable]
    # filter_string = ContextSetting('')

    sorting = Setting((0, Qt.AscendingOrder))
    selected_vars = ContextSetting([], schema_only=True)

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]

        # TODO: Implement filtering on the model
        # filter_box = gui.vBox(self.controlArea, 'Filter')
        # self.filter_text = gui.lineEdit(
        #     filter_box, self, value='filter_string',
        #     placeholderText='Filter variables by name',
        #     callback=self._filter_table_variables, callbackOnType=True,
        # )
        # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus)
        # shortcut.setWhatsThis('Filter variables by name')

        # Main area
        self.model = FeatureStatisticsTableModel(parent=self)
        self.table_view = FeatureStatisticsTableView(self.model, parent=self)
        self.table_view.selectionModel().selectionChanged.connect(self.on_select)
        self.table_view.horizontalHeader().sectionClicked.connect(self.on_header_click)

        self.controlArea.layout().addWidget(self.table_view)

        self.color_var_model = DomainModel(
            valid_types=(ContinuousVariable, DiscreteVariable),
            placeholder='None',
        )
        self.cb_color_var = gui.comboBox(
            self.buttonsArea, master=self, value='color_var', model=self.color_var_model,
            label='Color:', orientation=Qt.Horizontal, contentsLength=13,
            searchable=True
        )
        self.cb_color_var.activated.connect(self.__color_var_changed)

        gui.rubber(self.buttonsArea)
        gui.auto_send(self.buttonsArea, self, "auto_commit")

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    @staticmethod
    def sizeHint():
        return QSize(1050, 500)

    def _filter_table_variables(self):
        regex = QRegExp(self.filter_string)
        # If the user explicitly types different cases, we assume they know
        # what they are searching for and account for letter case in filter
        different_case = (
            any(c.islower() for c in self.filter_string) and
            any(c.isupper() for c in self.filter_string)
        )
        if not different_case:
            regex.setCaseSensitivity(Qt.CaseInsensitive)

    @Inputs.data
    def set_data(self, data):
        # Clear outputs and reset widget state
        self.closeContext()
        self.selected_vars = []
        self.model.resetSorting()
        self.Outputs.reduced_data.send(None)
        self.Outputs.statistics.send(None)

        # Setup widget state for new data and restore settings
        self.data = data

        if data is not None:
            self.info.set_input_summary(len(data),
                                        format_summary_details(data))
            self.color_var_model.set_domain(data.domain)
            self.color_var = None
            if self.data.domain.class_vars:
                self.color_var = self.data.domain.class_vars[0]
        else:
            self.info.set_input_summary(self.info.NoInput)
            self.color_var_model.set_domain(None)
            self.color_var = None
        self.model.set_data(data)

        self.openContext(self.data)
        self.__restore_selection()
        self.__restore_sorting()
        # self._filter_table_variables()
        self.__color_var_changed()

        self.commit()

    def __restore_selection(self):
        """Restore the selection on the table view from saved settings."""
        selection_model = self.table_view.selectionModel()
        selection = QItemSelection()
        if self.selected_vars:
            var_indices = {var: i for i, var in enumerate(self.model.variables)}
            selected_indices = [var_indices[var] for var in self.selected_vars]
            for row in self.model.mapFromSourceRows(selected_indices):
                selection.append(QItemSelectionRange(
                    self.model.index(row, 0),
                    self.model.index(row, self.model.columnCount() - 1)
                ))
        selection_model.select(selection, QItemSelectionModel.ClearAndSelect)

    def __restore_sorting(self):
        """Restore the sort column and order from saved settings."""
        sort_column, sort_order = self.sorting
        if self.model.n_attributes and sort_column < self.model.columnCount():
            self.model.sort(sort_column, sort_order)
            self.table_view.horizontalHeader().setSortIndicator(sort_column, sort_order)

    @pyqtSlot(int)
    def on_header_click(self, *_):
        # Store the header states
        sort_order = self.model.sortOrder()
        sort_column = self.model.sortColumn()
        self.sorting = sort_column, sort_order

    @pyqtSlot(int)
    def __color_var_changed(self, *_):
        if self.model is not None:
            self.model.set_target_var(self.color_var)

    def on_select(self):
        selection_indices = list(self.model.mapToSourceRows([
            i.row() for i in self.table_view.selectionModel().selectedRows()
        ]))
        self.selected_vars = list(self.model.variables[selection_indices])
        self.commit()

    def commit(self):
        if not self.selected_vars:
            self.info.set_output_summary(self.info.NoOutput)
            self.Outputs.reduced_data.send(None)
            self.Outputs.statistics.send(None)
            return

        # Send a table with only selected columns to output
        variables = self.selected_vars
        self.info.set_output_summary(len(self.data[:, variables]),
                                     format_summary_details(self.data[:, variables]))
        self.Outputs.reduced_data.send(self.data[:, variables])

        # Send the statistics of the selected variables to ouput
        labels, data = self.model.get_statistics_matrix(variables, return_labels=True)
        var_names = np.atleast_2d([var.name for var in variables]).T
        domain = Domain(
            attributes=[ContinuousVariable(name) for name in labels],
            metas=[StringVariable('Feature')]
        )
        statistics = Table(domain, data, metas=var_names)
        statistics.name = '%s (Feature Statistics)' % self.data.name
        self.Outputs.statistics.send(statistics)

    def send_report(self):
        view = self.table_view
        self.report_table(view)

    @classmethod
    def migrate_context(cls, context, version):
        if not version or version < 2:
            selected_rows = context.values.pop("selected_rows", None)
            if not selected_rows:
                selected_vars = []
            else:
                # This assumes that dict was saved by Python >= 3.6 so dict is
                # ordered; if not, context hasn't had worked anyway.
                all_vars = [
                    (var, tpe)
                    for (var, tpe) in chain(context.attributes.items(),
                                            context.metas.items())
                    # it would be nicer to use cls.HIDDEN_VAR_TYPES, but there
                    # is no suitable conversion function, and StringVariable (3)
                    # was the only hidden var when settings_version < 2, so:
                    if tpe != 3]
                selected_vars = [all_vars[i] for i in selected_rows]
            context.values["selected_vars"] = selected_vars, -3
Beispiel #7
0
class LineScanPlot(QWidget, OWComponent, SelectionGroupMixin,
                   ImageColorSettingMixin, ImageZoomMixin):

    attr_x = ContextSetting(None)
    gamma = Setting(0)

    selection_changed = Signal()

    def __init__(self, parent):
        QWidget.__init__(self)
        OWComponent.__init__(self, parent)
        SelectionGroupMixin.__init__(self)
        ImageColorSettingMixin.__init__(self)

        self.parent = parent

        self.selection_type = SELECTMANY
        self.saving_enabled = True
        self.selection_enabled = True
        self.viewtype = INDIVIDUAL  # required bt InteractiveViewBox
        self.highlighted = None
        self.data_points = None
        self.data_imagepixels = None

        self.plotview = pg.GraphicsLayoutWidget()
        self.plotview.show()

        self.plot = pg.PlotItem(background="w",
                                viewBox=InteractiveViewBox(self))
        self.plotview.addItem(self.plot)

        self.legend = ImageColorLegend()
        self.plotview.addItem(self.legend)

        self.plot.scene().installEventFilter(
            HelpEventDelegate(self.help_event, self))

        layout = QVBoxLayout()
        self.setLayout(layout)
        self.layout().setContentsMargins(0, 0, 0, 0)
        self.layout().addWidget(self.plotview)

        self.img = ImageItemNan()
        self.img.setOpts(axisOrder='row-major')
        self.plot.addItem(self.img)
        self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent)

        layout = QGridLayout()
        self.plotview.setLayout(layout)
        self.button = QPushButton("Menu", self.plotview)
        self.button.setAutoDefault(False)

        layout.setRowStretch(1, 1)
        layout.setColumnStretch(1, 1)
        layout.addWidget(self.button, 0, 0)
        view_menu = MenuFocus(self)
        self.button.setMenu(view_menu)

        # prepare interface according to the new context
        self.parent.contextAboutToBeOpened.connect(
            lambda x: self.init_interface_data(x[0]))

        self.add_zoom_actions(view_menu)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)

        choose_xy = QWidgetAction(self)
        box = gui.vBox(self)
        box.setFocusPolicy(Qt.TabFocus)
        self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                    valid_types=DomainModel.PRIMITIVE,
                                    placeholder="Position (index)")
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)

        box.setFocusProxy(self.cb_attr_x)

        box.layout().addWidget(self.color_settings_box())

        choose_xy.setDefaultWidget(box)
        view_menu.addAction(choose_xy)

        self.lsx = None  # info about the X axis
        self.lsy = None  # info about the Y axis

        self.data = None
        self.data_ids = {}

    def init_interface_data(self, data):
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def help_event(self, ev):
        pos = self.plot.vb.mapSceneToView(ev.scenePos())
        sel, wavenumber_ind = self._points_at_pos(pos)
        prepared = []
        if sel is not None:
            prepared.append(str(self.wavenumbers[wavenumber_ind]))
            for d in self.data[sel]:
                variables = [
                    v for v in self.data.domain.metas +
                    self.data.domain.class_vars if v not in [self.attr_x]
                ]
                features = [
                    '{} = {}'.format(attr.name, d[attr]) for attr in variables
                ]
                features.append('value = {}'.format(d[wavenumber_ind]))
                prepared.append("\n".join(features))
        text = "\n\n".join(prepared)
        if text:
            text = ('<span style="white-space:pre">{}</span>'.format(
                escape(text)))
            QToolTip.showText(ev.screenPos(), text, widget=self.plotview)
            return True
        else:
            return False

    def update_attr(self):
        self.update_view()

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None

    def set_data(self, data):
        if data:
            self.data = data
            self.data_ids = {e: i for i, e in enumerate(data.ids)}
            self.restore_selection_settings()
        else:
            self.data = None
            self.data_ids = {}

    def update_view(self):
        self.img.clear()
        self.img.setSelection(None)
        self.legend.set_colors(None)
        self.lsx = None
        self.lsy = None
        self.wavenumbers = None
        self.data_xs = None
        self.data_imagepixels = None
        if self.data and len(self.data.domain.attributes):
            if self.attr_x is not None:
                xat = self.data.domain[self.attr_x]
                ndom = Domain([xat])
                datam = Table(ndom, self.data)
                coorx = datam.X[:, 0]
            else:
                coorx = np.arange(len(self.data))
            self.lsx = lsx = values_to_linspace(coorx)
            self.data_xs = coorx

            self.wavenumbers = wavenumbers = getx(self.data)
            self.lsy = lsy = values_to_linspace(wavenumbers)

            # set data
            imdata = np.ones((lsy[2], lsx[2])) * float("nan")
            xindex = index_values(coorx, lsx)
            yindex = index_values(wavenumbers, lsy)
            for xind, d in zip(xindex, self.data.X):
                imdata[yindex, xind] = d

            self.data_imagepixels = xindex

            self.img.setImage(imdata, autoLevels=False)
            self.update_levels()
            self.update_color_schema()

            # shift centres of the pixels so that the axes are useful
            shiftx = _shift(lsx)
            shifty = _shift(lsy)
            left = lsx[0] - shiftx
            bottom = lsy[0] - shifty
            width = (lsx[1] - lsx[0]) + 2 * shiftx
            height = (lsy[1] - lsy[0]) + 2 * shifty
            self.img.setRect(QRectF(left, bottom, width, height))

            self.refresh_img_selection()

    def refresh_img_selection(self):
        selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8)
        selected_px[:, self.data_imagepixels] = self.selection_group
        self.img.setSelection(selected_px)

    def make_selection(self, selected, add):
        """Add selected indices to the selection."""
        add_to_group, add_group, remove = selection_modifiers()
        if self.data and self.lsx and self.lsy:
            if add_to_group:  # both keys - need to test it before add_group
                selnum = np.max(self.selection_group)
            elif add_group:
                selnum = np.max(self.selection_group) + 1
            elif remove:
                selnum = 0
            else:
                self.selection_group *= 0
                selnum = 1
            if selected is not None:
                self.selection_group[selected] = selnum
            self.refresh_img_selection()
        self.prepare_settings_for_saving()
        self.selection_changed.emit()

    def _points_at_pos(self, pos):
        if self.data and self.lsx and self.lsy:
            x, y = pos.x(), pos.y()
            x_distance = np.abs(self.data_xs - x)
            sel = (x_distance < _shift(self.lsx))
            wavenumber_distance = np.abs(self.wavenumbers - y)
            wavenumber_ind = np.argmin(wavenumber_distance)
            return sel, wavenumber_ind
        return None, None

    def select_by_click(self, pos, add):
        sel, _ = self._points_at_pos(pos)
        self.make_selection(sel, add)
Beispiel #8
0
class OWScatterPlot(OWDataProjectionWidget):
    """Scatterplot visualization with explorative analysis and intelligent
    data visualization enhancements."""

    name = 'Scatter Plot'
    description = "Interactive scatter plot visualization with " \
                  "intelligent data visualization enhancements."
    icon = "icons/ScatterPlot.svg"
    priority = 140
    keywords = []

    class Inputs(OWDataProjectionWidget.Inputs):
        features = Input("Features", AttributeList)

    class Outputs(OWDataProjectionWidget.Outputs):
        features = Output("Features", AttributeList, dynamic=False)

    settings_version = 4
    auto_sample = Setting(True)
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    tooltip_shows_all = Setting(True)

    GRAPH_CLASS = OWScatterPlotGraph
    graph = SettingProvider(OWScatterPlotGraph)
    embedding_variables_names = None

    xy_changed_manually = Signal(Variable, Variable)

    class Warning(OWDataProjectionWidget.Warning):
        missing_coords = Msg("Plot cannot be displayed because '{}' or '{}' "
                             "is missing for all data points")
        no_continuous_vars = Msg("Data has no continuous variables")

    class Information(OWDataProjectionWidget.Information):
        sampled_sql = Msg("Large SQL table; showing a sample.")
        missing_coords = Msg(
            "Points with missing '{}' or '{}' are not displayed")

    def __init__(self):
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)
        super().__init__()

        # manually register Matplotlib file writers
        self.graph_writers = self.graph_writers.copy()
        for w in [MatplotlibFormat, MatplotlibPDFFormat]:
            self.graph_writers.append(w)

    def _add_controls(self):
        self._add_controls_axis()
        self._add_controls_sampling()
        super()._add_controls()
        self.gui.add_widgets([
            self.gui.ShowGridLines, self.gui.ToolTipShowsAll,
            self.gui.RegressionLine
        ], self._plot_box)
        gui.checkBox(
            gui.indentedBox(self._plot_box),
            self,
            value="graph.orthonormal_regression",
            label="Treat variables as independent",
            callback=self.graph.update_regression_line,
            tooltip=
            "If checked, fit line to group (minimize distance from points);\n"
            "otherwise fit y as a function of x (minimize vertical distances)")

    def _add_controls_axis(self):
        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str,
                              contentsLength=14)
        box = gui.vBox(self.controlArea, True)
        dmod = DomainModel
        self.xy_model = DomainModel(dmod.MIXED, valid_types=ContinuousVariable)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.set_attr_from_combo,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.set_attr_from_combo,
                                      model=self.xy_model,
                                      **common_options)
        vizrank_box = gui.hBox(box)
        self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank(
            vizrank_box, self, "Find Informative Projections", self.set_attr)

    def _add_controls_sampling(self):
        self.sampling = gui.auto_commit(self.controlArea,
                                        self,
                                        "auto_sample",
                                        "Sample",
                                        box="Sampling",
                                        callback=self.switch_sampling,
                                        commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

    @property
    def effective_variables(self):
        return [self.attr_x, self.attr_y]

    def _vizrank_color_change(self):
        self.vizrank.initialize()
        is_enabled = self.data is not None and not self.data.is_sparse() and \
            len(self.xy_model) > 2 and len(self.data[self.valid_data]) > 1 \
            and np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.vizrank_button.setEnabled(
            is_enabled and self.attr_color is not None and not np.isnan(
                self.data.get_column_view(
                    self.attr_color)[0].astype(float)).all())
        text = "Color variable has to be selected." \
            if is_enabled and self.attr_color is None else ""
        self.vizrank_button.setToolTip(text)

    def set_data(self, data):
        super().set_data(data)

        def findvar(name, iterable):
            """Find a Orange.data.Variable in `iterable` by name"""
            for el in iterable:
                if isinstance(el, Variable) and el.name == name:
                    return el
            return None

        # handle restored settings from  < 3.3.9 when attr_* were stored
        # by name
        if isinstance(self.attr_x, str):
            self.attr_x = findvar(self.attr_x, self.xy_model)
        if isinstance(self.attr_y, str):
            self.attr_y = findvar(self.attr_y, self.xy_model)
        if isinstance(self.attr_label, str):
            self.attr_label = findvar(self.attr_label, self.gui.label_model)
        if isinstance(self.attr_color, str):
            self.attr_color = findvar(self.attr_color, self.gui.color_model)
        if isinstance(self.attr_shape, str):
            self.attr_shape = findvar(self.attr_shape, self.gui.shape_model)
        if isinstance(self.attr_size, str):
            self.attr_size = findvar(self.attr_size, self.gui.size_model)

    def check_data(self):
        super().check_data()
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(self.data, SqlTable):
            if self.data.approx_len() < 4000:
                self.data = Table(self.data)
            else:
                self.Information.sampled_sql()
                self.sql_data = self.data
                data_sample = self.data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                self.data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if self.data is not None:
            if not self.data.domain.has_continuous_attributes(True, True):
                self.Warning.no_continuous_vars()
                self.data = None

        if self.data is not None and (len(self.data) == 0
                                      or len(self.data.domain) == 0):
            self.data = None

    def get_embedding(self):
        self.valid_data = None
        if self.data is None:
            return None

        x_data = self.get_column(self.attr_x, filter_valid=False)
        y_data = self.get_column(self.attr_y, filter_valid=False)
        if x_data is None or y_data is None:
            return None

        self.Warning.missing_coords.clear()
        self.Information.missing_coords.clear()
        self.valid_data = np.isfinite(x_data) & np.isfinite(y_data)
        if self.valid_data is not None and not np.all(self.valid_data):
            msg = self.Information if np.any(self.valid_data) else self.Warning
            msg.missing_coords(self.attr_x.name, self.attr_y.name)
        return np.vstack((x_data, y_data)).T

    # Tooltip
    def _point_tooltip(self, point_id, skip_attrs=()):
        point_data = self.data[point_id]
        xy_attrs = (self.attr_x, self.attr_y)
        text = "<br/>".join(
            escape('{} = {}'.format(var.name, point_data[var]))
            for var in xy_attrs)
        if self.tooltip_shows_all:
            others = super()._point_tooltip(point_id, skip_attrs=xy_attrs)
            if others:
                text = "<b>{}</b><br/><br/>{}".format(text, others)
        return text

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            self.__timer.stop()
            return
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.handleNewSignals()

    def init_attr_values(self):
        super().init_attr_values()
        data = self.data
        domain = data.domain if data and len(data) else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def set_subset_data(self, subset_data):
        self.warning()
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning("Data subset does not support large Sql tables")
                subset_data = None
        super().set_subset_data(subset_data)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        if self.attribute_selection_list and self.data is not None and \
                self.data.domain is not None and \
                all(attr in self.data.domain for attr
                        in self.attribute_selection_list):
            self.attr_x, self.attr_y = self.attribute_selection_list[:2]
            self.attribute_selection_list = None
        super().handleNewSignals()
        self._vizrank_color_change()

    @Inputs.features
    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
            self._invalidated = self._invalidated \
                or self.attr_x != attributes[0] \
                or self.attr_y != attributes[1]
        else:
            self.attribute_selection_list = None

    def set_attr(self, attr_x, attr_y):
        if attr_x != self.attr_x or attr_y != self.attr_y:
            self.attr_x, self.attr_y = attr_x, attr_y
            self.attr_changed()

    def set_attr_from_combo(self):
        self.attr_changed()
        self.xy_changed_manually.emit(self.attr_x, self.attr_y)

    def attr_changed(self):
        self.setup_plot()
        self.commit()

    def get_axes(self):
        return {"bottom": self.attr_x, "left": self.attr_y}

    def colors_changed(self):
        super().colors_changed()
        self._vizrank_color_change()

    def commit(self):
        super().commit()
        self.send_features()

    def send_features(self):
        features = [attr for attr in [self.attr_x, self.attr_y] if attr]
        self.Outputs.features.send(features or None)

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)
        return None

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2 and "selection" in settings and settings["selection"]:
            settings["selection_group"] = [(a, 1)
                                           for a in settings["selection"]]
        if version < 3:
            if "auto_send_selection" in settings:
                settings["auto_commit"] = settings["auto_send_selection"]
            if "selection_group" in settings:
                settings["selection"] = settings["selection_group"]

    @classmethod
    def migrate_context(cls, context, version):
        values = context.values
        if version < 3:
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
        if version < 4:
            if values["attr_x"][1] % 100 == 1 or values["attr_y"][1] % 100 == 1:
                raise IncompatibleContext()
Beispiel #9
0
class OWSelectAttributes(widget.OWWidget):
    # pylint: disable=too-many-instance-attributes
    name = "Select Columns"
    description = "Select columns from the data table and assign them to " \
                  "data features, classes or meta variables."
    icon = "icons/SelectColumns.svg"
    priority = 100
    keywords = ["filter", "attributes", "target", "variable"]

    class Inputs:
        data = Input("Data", Table, default=True)
        features = Input("Features", AttributeList)

    class Outputs:
        data = Output("Data", Table)
        features = Output("Features", AttributeList, dynamic=False)

    want_main_area = False
    want_control_area = True

    settingsHandler = SelectAttributesDomainContextHandler(first_match=False)
    domain_role_hints = ContextSetting({})
    use_input_features = Setting(False)
    ignore_new_features = Setting(False)
    auto_commit = Setting(True)

    class Warning(widget.OWWidget.Warning):
        mismatching_domain = Msg("Features and data domain do not match")
        multiple_targets = Msg("Most widgets do not support multiple targets")

    def __init__(self):
        super().__init__()
        self.data = None
        self.features = None

        # Schedule interface updates (enabled buttons) using a coalescing
        # single shot timer (complex interactions on selection and filtering
        # updates in the 'available_attrs_view')
        self.__interface_update_timer = QTimer(self,
                                               interval=0,
                                               singleShot=True)
        self.__interface_update_timer.timeout.connect(
            self.__update_interface_state)
        # The last view that has the selection for move operation's source
        self.__last_active_view = None  # type: Optional[QListView]

        def update_on_change(view):
            # Schedule interface state update on selection change in `view`
            self.__last_active_view = view
            self.__interface_update_timer.start()

        self.controlArea = QWidget(self.controlArea)
        self.layout().addWidget(self.controlArea)
        layout = QGridLayout()
        self.controlArea.setLayout(layout)
        layout.setContentsMargins(4, 4, 4, 4)
        box = gui.vBox(self.controlArea,
                       "Available Variables",
                       addToLayout=False)

        self.available_attrs = VariablesListItemModel()
        filter_edit, self.available_attrs_view = variables_filter(
            parent=self, model=self.available_attrs)
        box.layout().addWidget(filter_edit)

        def dropcompleted(action):
            if action == Qt.MoveAction:
                self.commit()

        self.available_attrs_view.selectionModel().selectionChanged.connect(
            partial(update_on_change, self.available_attrs_view))
        self.available_attrs_view.dragDropActionDidComplete.connect(
            dropcompleted)

        box.layout().addWidget(self.available_attrs_view)
        layout.addWidget(box, 0, 0, 3, 1)

        box = gui.vBox(self.controlArea, "Features", addToLayout=False)
        self.used_attrs = VariablesListItemModel()
        filter_edit, self.used_attrs_view = variables_filter(
            parent=self,
            model=self.used_attrs,
            accepted_type=(Orange.data.DiscreteVariable,
                           Orange.data.ContinuousVariable))
        self.used_attrs.rowsInserted.connect(self.__used_attrs_changed)
        self.used_attrs.rowsRemoved.connect(self.__used_attrs_changed)
        self.used_attrs_view.selectionModel().selectionChanged.connect(
            partial(update_on_change, self.used_attrs_view))
        self.used_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        self.use_features_box = gui.auto_commit(
            self.controlArea,
            self,
            "use_input_features",
            "Use input features",
            "Always use input features",
            box=False,
            commit=self.__use_features_clicked,
            callback=self.__use_features_changed,
            addToLayout=False)
        self.enable_use_features_box()
        box.layout().addWidget(self.use_features_box)
        box.layout().addWidget(filter_edit)
        box.layout().addWidget(self.used_attrs_view)
        layout.addWidget(box, 0, 2, 1, 1)

        box = gui.vBox(self.controlArea, "Target Variable", addToLayout=False)
        self.class_attrs = VariablesListItemModel()
        self.class_attrs_view = VariablesListItemView(
            acceptedType=(Orange.data.DiscreteVariable,
                          Orange.data.ContinuousVariable))
        self.class_attrs_view.setModel(self.class_attrs)
        self.class_attrs_view.selectionModel().selectionChanged.connect(
            partial(update_on_change, self.class_attrs_view))
        self.class_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        self.class_attrs_view.setMaximumHeight(72)
        box.layout().addWidget(self.class_attrs_view)
        layout.addWidget(box, 1, 2, 1, 1)

        box = gui.vBox(self.controlArea, "Meta Attributes", addToLayout=False)
        self.meta_attrs = VariablesListItemModel()
        self.meta_attrs_view = VariablesListItemView(
            acceptedType=Orange.data.Variable)
        self.meta_attrs_view.setModel(self.meta_attrs)
        self.meta_attrs_view.selectionModel().selectionChanged.connect(
            partial(update_on_change, self.meta_attrs_view))
        self.meta_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        box.layout().addWidget(self.meta_attrs_view)
        layout.addWidget(box, 2, 2, 1, 1)

        bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0)
        layout.addWidget(bbox, 0, 1, 1, 1)

        self.up_attr_button = gui.button(bbox,
                                         self,
                                         "Up",
                                         callback=partial(
                                             self.move_up,
                                             self.used_attrs_view))
        self.move_attr_button = gui.button(bbox,
                                           self,
                                           ">",
                                           callback=partial(
                                               self.move_selected,
                                               self.used_attrs_view))
        self.down_attr_button = gui.button(bbox,
                                           self,
                                           "Down",
                                           callback=partial(
                                               self.move_down,
                                               self.used_attrs_view))

        bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0)
        layout.addWidget(bbox, 1, 1, 1, 1)

        self.up_class_button = gui.button(bbox,
                                          self,
                                          "Up",
                                          callback=partial(
                                              self.move_up,
                                              self.class_attrs_view))
        self.move_class_button = gui.button(bbox,
                                            self,
                                            ">",
                                            callback=partial(
                                                self.move_selected,
                                                self.class_attrs_view))
        self.down_class_button = gui.button(bbox,
                                            self,
                                            "Down",
                                            callback=partial(
                                                self.move_down,
                                                self.class_attrs_view))

        bbox = gui.vBox(self.controlArea, addToLayout=False)
        layout.addWidget(bbox, 2, 1, 1, 1)
        self.up_meta_button = gui.button(bbox,
                                         self,
                                         "Up",
                                         callback=partial(
                                             self.move_up,
                                             self.meta_attrs_view))
        self.move_meta_button = gui.button(bbox,
                                           self,
                                           ">",
                                           callback=partial(
                                               self.move_selected,
                                               self.meta_attrs_view))
        self.down_meta_button = gui.button(bbox,
                                           self,
                                           "Down",
                                           callback=partial(
                                               self.move_down,
                                               self.meta_attrs_view))

        bbox = gui.vBox(self.controlArea,
                        "Additional settings",
                        addToLayout=False)
        gui.checkBox(
            widget=bbox,
            master=self,
            value="ignore_new_features",
            label="Ignore new variables by default",
            tooltip="When the widget receives data with additional columns "
            "they are added to the available attributes column if "
            "<i>Ignore new variables by default</i> is checked.")
        layout.addWidget(bbox, 3, 0, 1, 3)

        autobox = gui.auto_send(None, self, "auto_commit")
        layout.addWidget(autobox, 4, 0, 1, 3)
        reset = gui.button(None, self, "Reset", callback=self.reset, width=120)
        autobox.layout().insertWidget(0, reset)
        autobox.layout().insertStretch(1, 20)

        layout.setRowStretch(0, 4)
        layout.setRowStretch(1, 0)
        layout.setRowStretch(2, 2)
        layout.setHorizontalSpacing(0)
        self.controlArea.setLayout(layout)

        self.output_data = None
        self.original_completer_items = []

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        self.resize(600, 600)

    @property
    def features_from_data_attributes(self):
        if self.data is None or self.features is None:
            return []
        domain = self.data.domain
        return [
            domain[feature.name] for feature in self.features
            if feature.name in domain
            and domain[feature.name] in domain.attributes
        ]

    def can_use_features(self):
        return bool(self.features_from_data_attributes) and \
               self.features_from_data_attributes != self.used_attrs[:]

    def __use_features_changed(self):  # Use input features check box
        # Needs a check since callback is invoked before object is created
        if not hasattr(self, "use_features_box"):
            return
        self.enable_used_attrs(not self.use_input_features)
        if self.use_input_features and self.can_use_features():
            self.use_features()
        if not self.use_input_features:
            self.enable_use_features_box()

    def __use_features_clicked(self):  # Use input features button
        self.use_features()

    def __used_attrs_changed(self):
        self.enable_use_features_box()

    @Inputs.data
    def set_data(self, data=None):
        self.update_domain_role_hints()
        self.closeContext()
        self.domain_role_hints = {}

        self.data = data
        if data is None:
            self.used_attrs[:] = []
            self.class_attrs[:] = []
            self.meta_attrs[:] = []
            self.available_attrs[:] = []
            self.info.set_input_summary(self.info.NoInput)
            return

        self.openContext(data)
        all_vars = data.domain.variables + data.domain.metas

        def attrs_for_role(role):
            selected_attrs = [
                attr for attr in all_vars if domain_hints[attr][0] == role
            ]
            return sorted(selected_attrs,
                          key=lambda attr: domain_hints[attr][1])

        domain_hints = self.restore_hints(data.domain)
        self.used_attrs[:] = attrs_for_role("attribute")
        self.class_attrs[:] = attrs_for_role("class")
        self.meta_attrs[:] = attrs_for_role("meta")
        self.available_attrs[:] = attrs_for_role("available")
        self.info.set_input_summary(len(data), format_summary_details(data))

    def restore_hints(self, domain: Domain) -> Dict[Variable, Tuple[str, int]]:
        """
        Define hints for selected/unselected features.
        Rules:
        - if context available, restore new features based on checked/unchecked
          ignore_new_features, context hint should be took into account
        - in no context, restore features based on the domain (as selected)

        Parameters
        ----------
        domain
            Data domain

        Returns
        -------
        Dictionary with hints about order and model in which each feature
        should appear
        """
        domain_hints = {}
        if not self.ignore_new_features or len(self.domain_role_hints) == 0:
            # select_new_features selected or no context - restore based on domain
            domain_hints.update(
                self._hints_from_seq("attribute", domain.attributes))
            domain_hints.update(self._hints_from_seq("meta", domain.metas))
            domain_hints.update(
                self._hints_from_seq("class", domain.class_vars))
        else:
            # if context restored and ignore_new_features selected - restore
            # new features as available
            d = domain.attributes + domain.metas + domain.class_vars
            domain_hints.update(self._hints_from_seq("available", d))

        domain_hints.update(self.domain_role_hints)
        return domain_hints

    def update_domain_role_hints(self):
        """ Update the domain hints to be stored in the widgets settings.
        """
        hints = {}
        hints.update(self._hints_from_seq("available", self.available_attrs))
        hints.update(self._hints_from_seq("attribute", self.used_attrs))
        hints.update(self._hints_from_seq("class", self.class_attrs))
        hints.update(self._hints_from_seq("meta", self.meta_attrs))
        self.domain_role_hints = hints

    @staticmethod
    def _hints_from_seq(role, model):
        return [(attr, (role, i)) for i, attr in enumerate(model)]

    @Inputs.features
    def set_features(self, features):
        self.features = features

    def handleNewSignals(self):
        self.check_data()
        self.enable_used_attrs()
        self.enable_use_features_box()
        if self.use_input_features and self.features_from_data_attributes:
            self.enable_used_attrs(False)
            self.use_features()
        self.unconditional_commit()

    def check_data(self):
        self.Warning.mismatching_domain.clear()
        if self.data is not None and self.features is not None and \
                not self.features_from_data_attributes:
            self.Warning.mismatching_domain()

    def enable_used_attrs(self, enable=True):
        self.up_attr_button.setEnabled(enable)
        self.move_attr_button.setEnabled(enable)
        self.down_attr_button.setEnabled(enable)
        self.used_attrs_view.setEnabled(enable)
        self.used_attrs_view.repaint()

    def enable_use_features_box(self):
        self.use_features_box.button.setEnabled(self.can_use_features())
        enable_checkbox = bool(self.features_from_data_attributes)
        self.use_features_box.setHidden(not enable_checkbox)
        self.use_features_box.repaint()

    def use_features(self):
        attributes = self.features_from_data_attributes
        available, used = self.available_attrs[:], self.used_attrs[:]
        self.available_attrs[:] = [
            attr for attr in used + available if attr not in attributes
        ]
        self.used_attrs[:] = attributes
        self.commit()

    @staticmethod
    def selected_rows(view):
        """ Return the selected rows in the view.
        """
        rows = view.selectionModel().selectedRows()
        model = view.model()
        if isinstance(model, QSortFilterProxyModel):
            rows = [model.mapToSource(r) for r in rows]
        return [r.row() for r in rows]

    def move_rows(self, view: QListView, offset: int, roles=(Qt.EditRole, )):
        rows = [idx.row() for idx in view.selectionModel().selectedRows()]
        model = view.model()  # type: QAbstractItemModel
        rowcount = model.rowCount()
        newrows = [min(max(0, row + offset), rowcount - 1) for row in rows]

        def itemData(index):
            return {role: model.data(index, role) for role in roles}

        for row, newrow in sorted(zip(rows, newrows), reverse=offset > 0):
            d1 = itemData(model.index(row, 0))
            d2 = itemData(model.index(newrow, 0))
            model.setItemData(model.index(row, 0), d2)
            model.setItemData(model.index(newrow, 0), d1)

        selection = QItemSelection()
        for nrow in newrows:
            index = model.index(nrow, 0)
            selection.select(index, index)
        view.selectionModel().select(selection,
                                     QItemSelectionModel.ClearAndSelect)

        self.commit()

    def move_up(self, view: QListView):
        self.move_rows(view, -1)

    def move_down(self, view: QListView):
        self.move_rows(view, 1)

    def move_selected(self, view):
        if self.selected_rows(view):
            self.move_selected_from_to(view, self.available_attrs_view)
        elif self.selected_rows(self.available_attrs_view):
            self.move_selected_from_to(self.available_attrs_view, view)

    def move_selected_from_to(self, src, dst):
        self.move_from_to(src, dst, self.selected_rows(src))

    def move_from_to(self, src, dst, rows):
        src_model = source_model(src)
        attrs = [src_model[r] for r in rows]

        for s1, s2 in reversed(list(slices(rows))):
            del src_model[s1:s2]

        dst_model = source_model(dst)

        dst_model.extend(attrs)

        self.commit()

    def __update_interface_state(self):
        last_view = self.__last_active_view
        if last_view is not None:
            self.update_interface_state(last_view)

    def update_interface_state(self, focus=None):
        for view in [
                self.available_attrs_view, self.used_attrs_view,
                self.class_attrs_view, self.meta_attrs_view
        ]:
            if view is not focus and not view.hasFocus() \
                    and view.selectionModel().hasSelection():
                view.selectionModel().clear()

        def selected_vars(view):
            model = source_model(view)
            return [model[i] for i in self.selected_rows(view)]

        available_selected = selected_vars(self.available_attrs_view)
        attrs_selected = selected_vars(self.used_attrs_view)
        class_selected = selected_vars(self.class_attrs_view)
        meta_selected = selected_vars(self.meta_attrs_view)

        available_types = set(map(type, available_selected))
        all_primitive = all(var.is_primitive() for var in available_types)

        move_attr_enabled = \
            ((available_selected and all_primitive) or attrs_selected) and \
            self.used_attrs_view.isEnabled()

        self.move_attr_button.setEnabled(bool(move_attr_enabled))
        if move_attr_enabled:
            self.move_attr_button.setText(">" if available_selected else "<")

        move_class_enabled = bool(all_primitive
                                  and available_selected) or class_selected

        self.move_class_button.setEnabled(bool(move_class_enabled))
        if move_class_enabled:
            self.move_class_button.setText(">" if available_selected else "<")
        move_meta_enabled = available_selected or meta_selected

        self.move_meta_button.setEnabled(bool(move_meta_enabled))
        if move_meta_enabled:
            self.move_meta_button.setText(">" if available_selected else "<")

        self.__last_active_view = None
        self.__interface_update_timer.stop()

    def commit(self):
        self.update_domain_role_hints()
        self.Warning.multiple_targets.clear()
        if self.data is not None:
            attributes = list(self.used_attrs)
            class_var = list(self.class_attrs)
            metas = list(self.meta_attrs)

            domain = Orange.data.Domain(attributes, class_var, metas)
            newdata = self.data.transform(domain)
            self.output_data = newdata
            self.Outputs.data.send(newdata)
            self.Outputs.features.send(AttributeList(attributes))
            self.info.set_output_summary(len(newdata),
                                         format_summary_details(newdata))
            self.Warning.multiple_targets(shown=len(class_var) > 1)
        else:
            self.output_data = None
            self.Outputs.data.send(None)
            self.Outputs.features.send(None)
            self.info.set_output_summary(self.info.NoOutput)

    def reset(self):
        self.enable_used_attrs()
        self.use_features_box.checkbox.setChecked(False)
        if self.data is not None:
            self.available_attrs[:] = []
            self.used_attrs[:] = self.data.domain.attributes
            self.class_attrs[:] = self.data.domain.class_vars
            self.meta_attrs[:] = self.data.domain.metas
            self.update_domain_role_hints()
            self.commit()

    def send_report(self):
        if not self.data or not self.output_data:
            return
        in_domain, out_domain = self.data.domain, self.output_data.domain
        self.report_domain("Input data", self.data.domain)
        if (in_domain.attributes, in_domain.class_vars,
                in_domain.metas) == (out_domain.attributes,
                                     out_domain.class_vars, out_domain.metas):
            self.report_paragraph("Output data", "No changes.")
        else:
            self.report_domain("Output data", self.output_data.domain)
            diff = list(
                set(in_domain.variables + in_domain.metas) -
                set(out_domain.variables + out_domain.metas))
            if diff:
                text = "%i (%s)" % (len(diff), ", ".join(x.name for x in diff))
                self.report_items((("Removed", text), ))
Beispiel #10
0
class OWSOM(OWWidget):
    name = "Self-Organizing Map"
    description = "Computation of self-organizing map."
    icon = "icons/SOM.svg"
    keywords = ["SOM"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    auto_dimension = Setting(True)
    size_x = Setting(10)
    size_y = Setting(10)
    hexagonal = Setting(1)
    initialization = Setting(0)

    attr_color = ContextSetting(None)
    size_by_instances = Setting(True)
    pie_charts = Setting(False)
    selection = Setting(None, schema_only=True)

    graph_name = "view"

    _grid_pen = QPen(QBrush(QColor(224, 224, 224)), 2)
    _grid_pen.setCosmetic(True)

    OptControls = namedtuple(
        "OptControls",
        ("shape", "auto_dim", "spin_x", "spin_y", "initialization", "start")
    )

    class Warning(OWWidget.Warning):
        ignoring_disc_variables = Msg("SOM ignores discrete variables.")
        missing_colors = \
            Msg("Some data instances have undefined value of '{}'.")
        missing_values = \
            Msg("{} data instance{} with undefined value(s) {} not shown.")

    class Error(OWWidget.Error):
        no_numeric_variables = Msg("Data contains no numeric columns.")
        no_defined_rows = Msg("All rows contain at least one undefined value.")

    def __init__(self):
        super().__init__()
        self.__pending_selection = self.selection
        self._optimizer = None
        self._optimizer_thread = None
        self.stop_optimization = False

        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.selection = None
        self.colors = self.thresholds = None

        box = gui.vBox(self.controlArea, box="SOM")
        shape = gui.comboBox(
            box, self, "", items=("Hexagonal grid", "Square grid"))
        shape.setCurrentIndex(1 - self.hexagonal)

        box2 = gui.indentedBox(box, 10)
        auto_dim = gui.checkBox(
            box2, self, "auto_dimension", "Set dimensions automatically",
            callback=self.on_auto_dimension_changed)
        self.manual_box = box3 = gui.hBox(box2)
        spinargs = dict(
            value="", widget=box3, master=self, minv=5, maxv=100, step=5,
            alignment=Qt.AlignRight)
        spin_x = gui.spin(**spinargs)
        spin_x.setValue(self.size_x)
        gui.widgetLabel(box3, "×")
        spin_y = gui.spin(**spinargs)
        spin_y.setValue(self.size_y)
        gui.rubber(box3)
        self.manual_box.setEnabled(not self.auto_dimension)

        initialization = gui.comboBox(
            box, self, "initialization",
            items=("Initialize with PCA", "Random initialization",
                   "Replicable random"))

        start = gui.button(
            box, self, "Restart", callback=self.restart_som_pressed,
            sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))

        self.opt_controls = self.OptControls(
            shape, auto_dim, spin_x, spin_y, initialization, start)

        box = gui.vBox(self.controlArea, "Color")
        gui.comboBox(
            box, self, "attr_color", maximumContentsLength=15,
            callback=self.on_attr_color_change,
            model=DomainModel(placeholder="(Same color)",
                              valid_types=DomainModel.PRIMITIVE))
        gui.checkBox(
            box, self, "pie_charts", label="Show pie charts",
            callback=self.on_pie_chart_change)
        gui.checkBox(
            box, self, "size_by_instances", label="Size by number of instances",
            callback=self.on_attr_size_change)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene(self)

        self.view = SomView(self.scene)
        self.view.setMinimumWidth(400)
        self.view.setMinimumHeight(400)
        self.view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.selection_changed.connect(self.on_selection_change)
        self.view.selection_moved.connect(self.on_selection_move)
        self.view.selection_mark_changed.connect(self.on_selection_mark_change)
        self.mainArea.layout().addWidget(self.view)

        self.elements = None
        self.grid = None
        self.grid_cells = None
        self.legend = None

    @Inputs.data
    def set_data(self, data):
        def prepare_data():
            if len(cont_attrs) < len(attrs):
                self.Warning.ignoring_disc_variables()
            x = Table.from_table(Domain(cont_attrs), data).X
            if sp.issparse(x):
                self.data = data
                self.cont_x = x.tocsr()
            else:
                mask = np.all(np.isfinite(x), axis=1)
                if not np.any(mask):
                    self.Error.no_defined_rows()
                else:
                    if np.all(mask):
                        self.data = data
                        self.cont_x = x.copy()
                    else:
                        self.data = data[mask]
                        self.cont_x = x[mask]
                    self.cont_x -= np.min(self.cont_x, axis=0)[None, :]
                    sums = np.sum(self.cont_x, axis=0)[None, :]
                    sums[sums == 0] = 1
                    self.cont_x /= sums

        def set_warnings():
            missing = len(data) - len(self.data)
            if missing == 1:
                self.Warning.missing_values(1, "", "is")
            elif missing > 1:
                self.Warning.missing_values(missing, "s", "are")

        self.stop_optimization_and_wait()

        self.closeContext()
        self.clear()
        self.Error.clear()
        self.Warning.clear()

        if data is not None:
            attrs = data.domain.attributes
            cont_attrs = [var for var in attrs if var.is_continuous]
            if not cont_attrs:
                self.Error.no_numeric_variables()
            else:
                prepare_data()

        if self.data is not None:
            self.controls.attr_color.model().set_domain(data.domain)
            self.attr_color = data.domain.class_var
            set_warnings()

        self.openContext(self.data)
        self.set_color_bins()
        self.create_legend()
        self.recompute_dimensions()
        self._set_input_summary(data and len(data))
        self.start_som()

    def _set_input_summary(self, n_tot):
        if self.data is None:
            self.info.set_input_summary(self.info.NoInput)
            return

        n = len(self.data)
        inst = str(n)
        nvars = f"{self.cont_x.shape[1]} numeric variables"
        if n < n_tot:
            inst += f" ({n_tot})"
            details = f"{n_tot - n} out of {n_tot} instances ignored " \
                      f"because of missing values;\n{nvars}"
        else:
            details = f"{n} instances; {nvars}"

        self.info.set_input_summary(inst, details)

    def clear(self):
        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.attr_color = None
        self.colors = self.thresholds = None
        if self.elements is not None:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.clear_selection()
        self.controls.attr_color.model().set_domain(None)
        self.Warning.clear()
        self.Error.clear()

    def recompute_dimensions(self):
        if not self.auto_dimension or self.cont_x is None:
            return
        dim = max(5, int(np.ceil(np.sqrt(5 * np.sqrt(self.cont_x.shape[0])))))
        self.opt_controls.spin_x.setValue(dim)
        self.opt_controls.spin_y.setValue(dim)

    def on_auto_dimension_changed(self):
        self.manual_box.setEnabled(not self.auto_dimension)
        if self.auto_dimension:
            self.recompute_dimensions()
        else:
            spin_x = self.opt_controls.spin_x
            spin_y = self.opt_controls.spin_y
            dimx = int(5 * np.round(spin_x.value() / 5))
            dimy = int(5 * np.round(spin_y.value() / 5))
            spin_x.setValue(dimx)
            spin_y.setValue(dimy)

    def on_attr_color_change(self):
        self.controls.pie_charts.setEnabled(self.attr_color is not None)
        self.set_color_bins()
        self.create_legend()
        self.rescale()
        self._redraw()

    def on_attr_size_change(self):
        self._redraw()

    def on_pie_chart_change(self):
        self._redraw()

    def clear_selection(self):
        self.selection = None
        self.redraw_selection()

    def on_selection_change(self, selection, action=SomView.SelectionSet):
        if self.selection is None:
            self.selection = np.zeros(self.grid_cells.T.shape, dtype=np.int16)
        if action == SomView.SelectionSet:
            self.selection[:] = 0
            self.selection[selection] = 1
        elif action == SomView.SelectionAddToGroup:
            self.selection[selection] = max(1, np.max(self.selection))
        elif action == SomView.SelectionNewGroup:
            self.selection[selection] = 1 + np.max(self.selection)
        elif action & SomView.SelectionRemove:
            self.selection[selection] = 0
        self.redraw_selection()
        self.update_output()

    def on_selection_move(self, event: QKeyEvent):
        if self.selection is None or not np.any(self.selection):
            if event.key() in (Qt.Key_Right, Qt.Key_Down):
                x = y = 0
            else:
                x = self.size_x - 1
                y = self.size_y - 1
        else:
            x, y = np.nonzero(self.selection)
            if len(x) > 1:
                return
            if event.key() == Qt.Key_Up and y > 0:
                y -= 1
            if event.key() == Qt.Key_Down and y < self.size_y - 1:
                y += 1
            if event.key() == Qt.Key_Left and x:
                x -= 1
            if event.key() == Qt.Key_Right and x < self.size_x - 1:
                x += 1
            x -= self.hexagonal and x == self.size_x - 1 and y % 2

        if self.selection is not None and self.selection[x, y]:
            return
        selection = np.zeros(self.grid_cells.shape, dtype=bool)
        selection[x, y] = True
        self.on_selection_change(selection)

    def on_selection_mark_change(self, marks):
        self.redraw_selection(marks=marks)

    def redraw_selection(self, marks=None):
        if self.grid_cells is None:
            return

        sel_pen = QPen(QBrush(QColor(128, 128, 128)), 2)
        sel_pen.setCosmetic(True)
        mark_pen = QPen(QBrush(QColor(128, 128, 128)), 4)
        mark_pen.setCosmetic(True)
        pens = [self._grid_pen, sel_pen]

        mark_brush = QBrush(QColor(224, 255, 255))
        sels = self.selection is not None and np.max(self.selection)
        palette = ColorPaletteGenerator(number_of_colors=sels + 1)
        brushes = [QBrush(Qt.NoBrush)] + \
                  [QBrush(palette[i].lighter(165)) for i in range(sels)]

        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                cell = self.grid_cells[y, x]
                marked = marks is not None and marks[x, y]
                sel_group = self.selection is not None and self.selection[x, y]
                if marked:
                    cell.setBrush(mark_brush)
                    cell.setPen(mark_pen)
                else:
                    cell.setBrush(brushes[sel_group])
                    cell.setPen(pens[bool(sel_group)])
                cell.setZValue(marked or sel_group)

    def restart_som_pressed(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
        else:
            self.start_som()

    def start_som(self):
        self.read_controls()
        self.update_layout()
        self.clear_selection()
        if self.cont_x is not None:
            self.enable_controls(False)
            self._recompute_som()
        else:
            self.update_output()

    def read_controls(self):
        c = self.opt_controls
        self.hexagonal = c.shape.currentIndex() == 0
        self.size_x = c.spin_x.value()
        self.size_y = c.spin_y.value()

    def enable_controls(self, enable):
        c = self.opt_controls
        c.shape.setEnabled(enable)
        c.auto_dim.setEnabled(enable)
        c.start.setText("Start" if enable else "Stop")

    def update_layout(self):
        self.set_legend_pos()
        if self.elements:  # Prevent having redrawn grid but with old elements
            self.scene.removeItem(self.elements)
            self.elements = None
        self.redraw_grid()
        self.rescale()

    def _redraw(self):
        self.Warning.missing_colors.clear()
        if self.elements:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.view.set_dimensions(self.size_x, self.size_y, self.hexagonal)

        if self.cells is None:
            return
        sizes = self.cells[:, :, 1] - self.cells[:, :, 0]
        sizes = sizes.astype(float)
        if not self.size_by_instances:
            sizes[sizes != 0] = 0.8
        else:
            sizes *= 0.8 / np.max(sizes)

        self.elements = QGraphicsItemGroup()
        self.scene.addItem(self.elements)
        if self.attr_color is None:
            self._draw_same_color(sizes)
        elif self.pie_charts:
            self._draw_pie_charts(sizes)
        else:
            self._draw_colored_circles(sizes)

    @property
    def _grid_factors(self):
        return (0.5, sqrt3_2) if self.hexagonal else (0, 1)

    def _draw_same_color(self, sizes):
        fx, fy = self._grid_factors
        color = QColor(64, 64, 64)
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                n = len(self.get_member_indices(x, y))
                if not r:
                    continue
                ellipse = ColoredCircle(r / 2, color, 0)
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(f"{n} instances")
                self.elements.addToGroup(ellipse)

    def _get_color_column(self):
        color_column = \
            self.data.get_column_view(self.attr_color)[0].astype(float,
                                                                 copy=False)
        if self.attr_color.is_discrete:
            with np.errstate(invalid="ignore"):
                int_col = color_column.astype(int)
            int_col[np.isnan(color_column)] = len(self.colors)
        else:
            int_col = np.zeros(len(color_column), dtype=int)
            # The following line is not necessary because rows with missing
            # numeric data are excluded. Uncomment it if you change SOM to
            # tolerate missing values.
            # int_col[np.isnan(color_column)] = len(self.colors)
            for i, thresh in enumerate(self.thresholds, start=1):
                int_col[color_column >= thresh] = i
        return int_col

    def _tooltip(self, colors, distribution):
        if self.attr_color.is_discrete:
            values = self.attr_color.values
        else:
            values = self._bin_names()
        tot = np.sum(distribution)
        nbhp = "\N{NON-BREAKING HYPHEN}"
        return '<table style="white-space: nowrap">' + "".join(f"""
            <tr>
                <td>
                    <font color={color.name()}>■</font>
                    <b>{escape(val).replace("-", nbhp)}</b>:
                </td>
                <td>
                    {n} ({n / tot * 100:.1f}&nbsp;%)
                </td>
            </tr>
            """ for color, val, n in zip(colors, values, distribution) if n) \
            + "</table>"

    def _draw_pie_charts(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        colors = self.colors + [Qt.gray]
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    self.grid_cells[y, x].setToolTip("")
                    continue
                members = self.get_member_indices(x, y)
                color_dist = np.bincount(color_column[members],
                                         minlength=len(colors))
                rel_color_dist = color_dist.astype(float) / len(members)
                pie = PieChart(rel_color_dist, r / 2, colors)
                pie.setToolTip(self._tooltip(colors, color_dist))
                self.elements.addToGroup(pie)
                pie.setPos(x + (y % 2) * fx, y * fy)

    def _draw_colored_circles(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    continue
                members = self.get_member_indices(x, y)
                color_dist = color_column[members]
                color_dist = color_dist[color_dist < len(self.colors)]
                if len(color_dist) != len(members):
                    self.Warning.missing_colors(self.attr_color.name)
                bc = np.bincount(color_dist, minlength=len(self.colors))
                color = self.colors[np.argmax(bc)]
                ellipse = ColoredCircle(r / 2, color, np.max(bc) / len(members))
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(self._tooltip(self.colors, bc))
                self.elements.addToGroup(ellipse)

    def redraw_grid(self):
        if self.grid is not None:
            self.scene.removeItem(self.grid)
        self.grid = QGraphicsItemGroup()
        self.grid.setZValue(-200)
        self.grid_cells = np.full((self.size_y, self.size_x), None)
        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                if self.hexagonal:
                    cell = QGraphicsPathItem(_hexagon_path)
                    cell.setPos(x + (y % 2) / 2, y * sqrt3_2)
                else:
                    cell = QGraphicsRectItem(x - 0.5, y - 0.5, 1, 1)
                self.grid_cells[y, x] = cell
                cell.setPen(self._grid_pen)
                self.grid.addToGroup(cell)
        self.scene.addItem(self.grid)

    def get_member_indices(self, x, y):
        i, j = self.cells[x, y]
        return self.member_data[i:j]

    def _recompute_som(self):
        if self.cont_x is None:
            return

        class Optimizer(QObject):
            update = Signal(float, np.ndarray, np.ndarray)
            done = Signal(SOM)
            stopped = Signal()

            def __init__(self, data, widget):
                super().__init__()
                self.som = SOM(
                    widget.size_x, widget.size_y,
                    hexagonal=widget.hexagonal,
                    pca_init=widget.initialization == 0,
                    random_seed=0 if widget.initialization == 2 else None)
                self.data = data
                self.widget = widget

            def callback(self, progress):
                self.update.emit(
                    progress,
                    self.som.weights.copy(), self.som.ssum_weights.copy())
                return not self.widget.stop_optimization

            def run(self):
                try:
                    self.som.fit(self.data, N_ITERATIONS,
                                 callback=self.callback)
                    # Report an exception, but still remove the thread
                finally:
                    self.done.emit(self.som)
                    self.stopped.emit()

        def update(_progress, weights, ssum_weights):
            progressbar.advance()
            self._assign_instances(weights, ssum_weights)
            self._redraw()

        def done(som):
            self.enable_controls(True)
            progressbar.finish()
            self._assign_instances(som.weights, som.ssum_weights)
            self._redraw()
            # This is the first time we know what was selected (assuming that
            # initialization is not set to random)
            if self.__pending_selection is not None:
                self.on_selection_change(self.__pending_selection)
                self.__pending_selection = None
            self.update_output()

        def thread_finished():
            self._optimizer = None
            self._optimizer_thread = None

        progressbar = gui.ProgressBar(self, N_ITERATIONS)

        self._optimizer = Optimizer(self.cont_x, self)
        self._optimizer_thread = QThread()
        self._optimizer_thread.setStackSize(5 * 2 ** 20)
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._optimizer_thread.quit)
        self._optimizer.moveToThread(self._optimizer_thread)
        self._optimizer_thread.started.connect(self._optimizer.run)
        self._optimizer_thread.finished.connect(thread_finished)
        self.stop_optimization = False
        self._optimizer_thread.start()

    def stop_optimization_and_wait(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
            self._optimizer_thread.quit()
            self._optimizer_thread.wait()
            self._optimizer_thread = None

    def onDeleteWidget(self):
        self.stop_optimization_and_wait()
        self.clear()
        super().onDeleteWidget()

    def _assign_instances(self, weights, ssum_weights):
        if self.cont_x is None:
            return  # the widget is shutting down while signals still processed
        assignments = SOM.winner_from_weights(
            self.cont_x, weights, ssum_weights, self.hexagonal)
        members = defaultdict(list)
        for i, (x, y) in enumerate(assignments):
            members[(x, y)].append(i)
        members.pop(None, None)
        self.cells = np.empty((self.size_x, self.size_y, 2), dtype=int)
        self.member_data = np.empty(self.cont_x.shape[0], dtype=int)
        index = 0
        for x in range(self.size_x):
            for y in range(self.size_y):
                nmembers = len(members[(x, y)])
                self.member_data[index:index + nmembers] = members[(x, y)]
                self.cells[x, y] = [index, index + nmembers]
                index += nmembers

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.create_legend()  # re-wrap lines if necessary
        self.rescale()

    def rescale(self):
        if self.legend:
            leg_height = self.legend.boundingRect().height()
            leg_extra = 1.5
        else:
            leg_height = 0
            leg_extra = 1

        vw, vh = self.view.width(), self.view.height() - leg_height
        scale = min(vw / (self.size_x + 1),
                    vh / ((self.size_y + leg_extra) * self._grid_factors[1]))
        self.view.setTransform(QTransform.fromScale(scale, scale))
        if self.hexagonal:
            self.view.setSceneRect(
                0, -1, self.size_x - 1,
                (self.size_y + leg_extra) * sqrt3_2 + leg_height / scale)
        else:
            self.view.setSceneRect(
                -0.25, -0.25, self.size_x - 0.5,
                self.size_y - 0.5 + leg_height / scale)

    def update_output(self):
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return

        indices = np.zeros(len(self.data), dtype=int)
        if self.selection is not None and np.any(self.selection):
            for y in range(self.size_y):
                for x in range(self.size_x):
                    rows = self.get_member_indices(x, y)
                    indices[rows] = self.selection[x, y]

        if np.any(indices):
            sel_data = create_groups_table(self.data, indices, False, "Group")
            self.Outputs.selected_data.send(sel_data)
            self.info.set_output_summary(str(len(sel_data)))
        else:
            self.Outputs.selected_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)

        if np.max(indices) > 1:
            annotated = create_groups_table(self.data, indices)
        else:
            annotated = create_annotated_table(
                self.data, np.flatnonzero(indices))
        self.Outputs.annotated_data.send(annotated)

    def set_color_bins(self):
        if self.attr_color is None:
            self.thresholds = self.colors = None
        elif self.attr_color.is_discrete:
            self.thresholds = None
            self.colors = [QColor(*color) for color in self.attr_color.colors]
        else:
            col = self.data.get_column_view(self.attr_color)[0].astype(float)
            self.thresholds = decimal_binnings(col, min_bins=4)[0][1:-1]
            palette = ContinuousPaletteGenerator(*self.attr_color.colors)
            nbins = len(self.thresholds) + 1
            self.colors = [palette[i / (nbins - 1)] for i in range(nbins)]

    def create_legend(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)
            self.legend = None
        if self.attr_color is None:
            return

        if self.attr_color.is_discrete:
            names = self.attr_color.values
        else:
            names = self._bin_names()

        items = []
        size = 8
        for name, color in zip(names, self.colors):
            item = QGraphicsItemGroup()
            item.addToGroup(
                CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                Qt.gray, color))
            item.addToGroup(CanvasText(None, name, size, 0, Qt.AlignVCenter))
            items.append(item)

        self.legend = wrap_legend_items(
            items, hspacing=20, vspacing=16 + size,
            max_width=self.view.width() - 25)
        self.legend.setFlags(self.legend.ItemIgnoresTransformations)
        self.legend.setTransform(
            QTransform.fromTranslate(-self.legend.boundingRect().width() / 2,
                                     0))
        self.scene.addItem(self.legend)
        self.set_legend_pos()

    def _bin_names(self):
        sval = self.attr_color.repr_val
        return \
            [f"< {sval(self.thresholds[0])}"] \
            + [f"{sval(x)} - {sval(y)}"
               for x, y in zip(self.thresholds, self.thresholds[1:])] \
            + [f"≥ {sval(self.thresholds[-1])}"]

    def set_legend_pos(self):
        if self.legend is None:
            return
        self.legend.setPos(
            self.size_x / 2,
            (self.size_y + 0.2 + 0.3 * self.hexagonal) * self._grid_factors[1])

    def send_report(self):
        self.report_plot()
        if self.attr_color:
            self.report_caption(
                f"Self-organizing map colored by '{self.attr_color.name}'")
Beispiel #11
0
class OWDataTable(widget.OWWidget):
    name = "Data Table"
    description = "View the dataset in a spreadsheet."
    icon = "icons/Table.svg"
    priority = 50

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table, multiple=True)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    show_distributions = Setting(False)
    dist_color_RGB = Setting((220, 220, 220, 255))
    show_attribute_labels = Setting(True)
    select_rows = Setting(True)
    auto_commit = Setting(True)

    color_by_class = Setting(True)
    settingsHandler = DomainContextHandler(
        match_values=DomainContextHandler.MATCH_VALUES_ALL)
    selected_rows = ContextSetting([])
    selected_cols = ContextSetting([])

    def __init__(self):
        super().__init__()

        self._inputs = OrderedDict()

        self.dist_color = QColor(*self.dist_color_RGB)

        info_box = gui.vBox(self.controlArea, "Info")
        self.info_ex = gui.widgetLabel(info_box, 'No data on input.', )
        self.info_ex.setWordWrap(True)
        self.info_attr = gui.widgetLabel(info_box, ' ')
        self.info_attr.setWordWrap(True)
        self.info_class = gui.widgetLabel(info_box, ' ')
        self.info_class.setWordWrap(True)
        self.info_meta = gui.widgetLabel(info_box, ' ')
        self.info_meta.setWordWrap(True)
        info_box.setMinimumWidth(200)
        gui.separator(self.controlArea)

        box = gui.vBox(self.controlArea, "Variables")
        self.c_show_attribute_labels = gui.checkBox(
            box, self, "show_attribute_labels",
            "Show variable labels (if present)",
            callback=self._on_show_variable_labels_changed)

        gui.checkBox(box, self, "show_distributions",
                     'Visualize numeric values',
                     callback=self._on_distribution_color_changed)
        gui.checkBox(box, self, "color_by_class", 'Color by instance classes',
                     callback=self._on_distribution_color_changed)

        box = gui.vBox(self.controlArea, "Selection")

        gui.checkBox(box, self, "select_rows", "Select full rows",
                     callback=self._on_select_rows_changed)

        gui.rubber(self.controlArea)

        reset = gui.button(
            None, self, "Restore Original Order", callback=self.restore_order,
            tooltip="Show rows in the original order", autoDefault=False)
        self.buttonsArea.layout().insertWidget(0, reset)
        gui.auto_commit(self.buttonsArea, self, "auto_commit",
                        "Send Selected Rows", "Send Automatically")

        # GUI with tabs
        self.tabs = gui.tabWidget(self.mainArea)
        self.tabs.currentChanged.connect(self._on_current_tab_changed)

    def copy_to_clipboard(self):
        self.copy()

    def sizeHint(self):
        return QSize(800, 500)

    @Inputs.data
    def set_dataset(self, data, tid=None):
        """Set the input dataset."""
        self.closeContext()
        if data is not None:
            if tid in self._inputs:
                # update existing input slot
                slot = self._inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setSectionsMovable(True)
                header.setSectionsClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)

            view.dataset = data
            self.tabs.addTab(view, getattr(data, "name", "Data"))

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot
            self._inputs[tid] = slot

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):
                def update(f):
                    QMetaObject.invokeMethod(
                        self, "_update_info", Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self._inputs:
            slot = self._inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)
        self.selected_rows = []
        self.selected_cols = []
        self.openContext(data)
        self.set_selection()
        self.commit()

    def _setup_table_view(self, view, data):
        """Setup the `view` (QTableView) with `data` (Orange.data.Table)
        """
        if data is None:
            view.setModel(None)
            return

        datamodel = TableModel(data)
        datamodel = RichTableDecorator(datamodel)

        rowcount = data.approx_len()

        if self.color_by_class and data.domain.has_discrete_class:
            color_schema = [
                QColor(*c) for c in data.domain.class_var.colors]
        else:
            color_schema = None
        if self.show_distributions:
            view.setItemDelegate(
                gui.TableBarItem(
                    self, color=self.dist_color, color_schema=color_schema)
            )
        else:
            view.setItemDelegate(QStyledItemDelegate(self))

        # Enable/disable view sorting based on data's type
        view.setSortingEnabled(is_sortable(data))
        header = view.horizontalHeader()
        header.setSectionsClickable(is_sortable(data))
        header.setSortIndicatorShown(is_sortable(data))

        view.setModel(datamodel)

        vheader = view.verticalHeader()
        option = view.viewOptions()
        size = view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option,
            QSize(20, 20), view)

        vheader.setDefaultSectionSize(size.height() + 2)
        vheader.setMinimumSectionSize(5)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        # Limit the number of rows displayed in the QTableView
        # (workaround for QTBUG-18490 / QTBUG-28631)
        maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2)
        if rowcount > maxrows:
            sliceproxy = TableSliceProxy(
                parent=view, rowSlice=slice(0, maxrows))
            sliceproxy.setSourceModel(datamodel)
            # First reset the view (without this the header view retains
            # it's state - at this point invalid/broken)
            view.setModel(None)
            view.setModel(sliceproxy)

        assert view.model().rowCount() <= maxrows
        assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0

        # update the header (attribute names)
        self._update_variable_labels(view)

        selmodel = BlockSelectionModel(
            view.model(), parent=view, selectBlocks=not self.select_rows)
        view.setSelectionModel(selmodel)
        view.selectionModel().selectionChanged.connect(self.update_selection)

    #noinspection PyBroadException
    def set_corner_text(self, table, text):
        """Set table corner text."""
        # As this is an ugly hack, do everything in
        # try - except blocks, as it may stop working in newer Qt.

        if not hasattr(table, "btn") and not hasattr(table, "btnfailed"):
            try:
                btn = table.findChild(QAbstractButton)

                class efc(QObject):
                    def eventFilter(self, o, e):
                        if (isinstance(o, QAbstractButton) and
                                e.type() == QEvent.Paint):
                            # paint by hand (borrowed from QTableCornerButton)
                            btn = o
                            opt = QStyleOptionHeader()
                            opt.initFrom(btn)
                            state = QStyle.State_None
                            if btn.isEnabled():
                                state |= QStyle.State_Enabled
                            if btn.isActiveWindow():
                                state |= QStyle.State_Active
                            if btn.isDown():
                                state |= QStyle.State_Sunken
                            opt.state = state
                            opt.rect = btn.rect()
                            opt.text = btn.text()
                            opt.position = QStyleOptionHeader.OnlyOneSection
                            painter = QStylePainter(btn)
                            painter.drawControl(QStyle.CE_Header, opt)
                            return True     # eat event
                        return False
                table.efc = efc()
                btn.installEventFilter(table.efc)
                table.btn = btn

                if sys.platform == "darwin":
                    btn.setAttribute(Qt.WA_MacSmallSize)

            except Exception:
                table.btnfailed = True

        if hasattr(table, "btn"):
            try:
                btn = table.btn
                btn.setText(text)
                opt = QStyleOptionHeader()
                opt.text = btn.text()
                s = btn.style().sizeFromContents(
                    QStyle.CT_HeaderSection,
                    opt, QSize(),
                    btn).expandedTo(QApplication.globalStrut())
                if s.isValid():
                    table.verticalHeader().setMinimumWidth(s.width())
            except Exception:
                pass

    def _on_current_tab_changed(self, index):
        """Update the info box on current tab change"""
        view = self.tabs.widget(index)
        if view is not None and view.model() is not None:
            self.set_info(view._input_slot.summary)
        else:
            self.set_info(None)

    def _update_variable_labels(self, view):
        "Update the variable labels visibility for `view`"
        model = view.model()
        if isinstance(model, TableSliceProxy):
            model = model.sourceModel()

        if self.show_attribute_labels:
            model.setRichHeaderFlags(
                RichTableDecorator.Labels | RichTableDecorator.Name)

            labelnames = set()
            for a in model.source.domain.variables:
                labelnames.update(a.attributes.keys())
            labelnames = sorted(
                [label for label in labelnames if not label.startswith("_")])
            self.set_corner_text(view, "\n".join([""] + labelnames))
        else:
            model.setRichHeaderFlags(RichTableDecorator.Name)
            self.set_corner_text(view, "")

    def _on_show_variable_labels_changed(self):
        """The variable labels (var.attribues) visibility was changed."""
        for slot in self._inputs.values():
            self._update_variable_labels(slot.view)

    def _on_distribution_color_changed(self):
        for ti in range(self.tabs.count()):
            widget = self.tabs.widget(ti)
            model = widget.model()
            while isinstance(model, QAbstractProxyModel):
                model = model.sourceModel()
            data = model.source
            class_var = data.domain.class_var
            if self.color_by_class and class_var and class_var.is_discrete:
                color_schema = [QColor(*c) for c in class_var.colors]
            else:
                color_schema = None
            if self.show_distributions:
                delegate = gui.TableBarItem(self, color=self.dist_color,
                                            color_schema=color_schema)
            else:
                delegate = QStyledItemDelegate(self)
            widget.setItemDelegate(delegate)
        tab = self.tabs.currentWidget()
        if tab:
            tab.reset()

    def _on_select_rows_changed(self):
        for slot in self._inputs.values():
            selection_model = slot.view.selectionModel()
            selection_model.setSelectBlocks(not self.select_rows)
            if self.select_rows:
                slot.view.setSelectionBehavior(QTableView.SelectRows)
                # Expand the current selection to full row selection.
                selection_model.select(
                    selection_model.selection(),
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
            else:
                slot.view.setSelectionBehavior(QTableView.SelectItems)

    def restore_order(self):
        """Restore the original data order of the current view."""
        table = self.tabs.currentWidget()
        if table is not None:
            table.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)

    def set_info(self, summary):
        if summary is None:
            self.info_ex.setText("No data on input.")
            self.info_attr.setText("")
            self.info_class.setText("")
            self.info_meta.setText("")
        else:
            info_len, info_attr, info_class, info_meta = \
                format_summary(summary)

            self.info_ex.setText(info_len)
            self.info_attr.setText(info_attr)
            self.info_class.setText(info_class)
            self.info_meta.setText(info_meta)

    @Slot()
    def _update_info(self):
        current = self.tabs.currentWidget()
        if current is not None and current.model() is not None:
            self.set_info(current._input_slot.summary)

    def update_selection(self, *_):
        self.commit()

    def set_selection(self):
        if len(self.selected_rows) and len(self.selected_cols):
            view = self.tabs.currentWidget()
            model = view.model()
            if model.rowCount() <= self.selected_rows[-1] or \
                    model.columnCount() <= self.selected_cols[-1]:
                return

            selection = QItemSelection()
            rowranges = list(ranges(self.selected_rows))
            colranges = list(ranges(self.selected_cols))

            for rowstart, rowend in rowranges:
                for colstart, colend in colranges:
                    selection.append(
                        QItemSelectionRange(
                            view.model().index(rowstart, colstart),
                            view.model().index(rowend - 1, colend - 1)
                        )
                    )
            view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def get_selection(self, view):
        """
        Return the selected row and column indices of the selection in view.
        """
        selection = view.selectionModel().selection()
        model = view.model()
        # map through the proxies into input table.
        while isinstance(model, QAbstractProxyModel):
            selection = model.mapSelectionToSource(selection)
            model = model.sourceModel()

        assert isinstance(model, TableModel)

        indexes = selection.indexes()

        rows = numpy.unique([ind.row() for ind in indexes])
        # map the rows through the applied sorting (if any)
        rows = model.mapToSourceRows(rows)
        rows.sort()
        rows = rows.tolist()

        cols = sorted(set(ind.column() for ind in indexes))
        return rows, cols

    @staticmethod
    def _get_model(view):
        model = view.model()
        while isinstance(model, QAbstractProxyModel):
            model = model.sourceModel()
        return model

    def commit(self):
        """
        Commit/send the current selected row/column selection.
        """
        selected_data = table = rowsel = None
        view = self.tabs.currentWidget()
        if view and view.model() is not None:
            model = self._get_model(view)
            table = model.source  # The input data table

            # Selections of individual instances are not implemented
            # for SqlTables
            if isinstance(table, SqlTable):
                self.Outputs.selected_data.send(selected_data)
                self.Outputs.annotated_data.send(None)
                return

            rowsel, colsel = self.get_selection(view)
            self.selected_rows, self.selected_cols = rowsel, colsel

            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data

            domain = table.domain

            if len(colsel) < len(domain) + len(domain.metas):
                # only a subset of the columns is selected
                allvars = domain.class_vars + domain.metas + domain.attributes
                columns = [(c, model.headerData(c, Qt.Horizontal,
                                                TableModel.DomainRole))
                           for c in colsel]
                assert all(role is not None for _, role in columns)

                def select_vars(role):
                    """select variables for role (TableModel.DomainRole)"""
                    return [allvars[c] for c, r in columns if r == role]

                attrs = select_vars(TableModel.Attribute)
                if attrs and issparse(table.X):
                    # for sparse data you can only select all attributes
                    attrs = table.domain.attributes
                class_vars = select_vars(TableModel.ClassVar)
                metas = select_vars(TableModel.Meta)
                domain = Orange.data.Domain(attrs, class_vars, metas)

            # Avoid a copy if all/none rows are selected.
            if not rowsel:
                selected_data = None
            elif len(rowsel) == len(table):
                selected_data = select(table, None, domain)
            else:
                selected_data = select(table, rowsel, domain)

        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(create_annotated_table(table, rowsel))

    def copy(self):
        """
        Copy current table selection to the clipboard.
        """
        view = self.tabs.currentWidget()
        if view is not None:
            mime = table_selection_to_mime_data(view)
            QApplication.clipboard().setMimeData(
                mime, QClipboard.Clipboard
            )

    def send_report(self):
        view = self.tabs.currentWidget()
        if not view or not view.model():
            return
        model = self._get_model(view)
        self.report_data_brief(model.source)
        self.report_table(view)
Beispiel #12
0
class OWVisWidget(OWWidget):
    _shown_attributes = ContextSetting(default=[],
                                       required=ContextSetting.REQUIRED,
                                       selected='selected_shown',
                                       reservoir="_hidden_attributes")
    # Setting above will override these fields
    _hidden_attributes = ()
    selected_shown = ()
    selected_hidden = ()

    @property
    def shown_attributes(self):
        return [a[0] for a in self._shown_attributes]

    @shown_attributes.setter
    def shown_attributes(self, value):
        shown = []
        hidden = []

        domain = self.get_data_domain()
        attr_info = lambda a: (a.name, vartype(a))
        if domain:
            if value:
                shown = value if isinstance(value[0], tuple) else [
                    attr_info(domain[a]) for a in value
                ]
                hidden = [
                    x
                    for x in [attr_info(domain[a]) for a in domain.attributes]
                    if x not in shown
                ]
            else:
                shown = [attr_info(a) for a in domain.attributes]
                if not self.show_all_attributes:
                    hidden = shown[10:]
                    shown = shown[:10]

            if domain.class_var and attr_info(domain.class_var) not in shown:
                hidden += [attr_info(domain.class_var)]

        self._shown_attributes = shown
        self._hidden_attributes = hidden
        self.selected_hidden = []
        self.selected_shown = []

        self.trigger_attributes_changed()

    @property
    def hidden_attributes(self):
        return [a[0] for a in self._hidden_attributes]

    __attribute_selection_area_initialized = False

    #noinspection PyAttributeOutsideInit
    def add_attribute_selection_area(self, parent):
        self.add_shown_attributes(parent)
        self.add_hidden_attributes(parent)
        self.__attribute_selection_area_initialized = True

        self.trigger_attributes_changed()

    #noinspection PyAttributeOutsideInit
    def add_shown_attributes(self, parent):
        self.shown_attributes_area = gui.widgetBox(parent,
                                                   " Shown attributes ")
        self.shown_attributes_listbox = gui.listBox(
            self.shown_attributes_area,
            self,
            "selected_shown",
            "_shown_attributes",
            dragDropCallback=self.trigger_attributes_changed,
            enableDragDrop=True,
            selectionMode=QListWidget.ExtendedSelection)

    #noinspection PyAttributeOutsideInit
    def add_hidden_attributes(self, parent):
        self.hidden_attributes_area = gui.widgetBox(parent,
                                                    " Hidden attributes ")
        self.hidden_attributes_listbox = gui.listBox(
            self.hidden_attributes_area,
            self,
            "selected_hidden",
            "_hidden_attributes",
            dragDropCallback=self.trigger_attributes_changed,
            enableDragDrop=True,
            selectionMode=QListWidget.ExtendedSelection)

    def get_data_domain(self):
        if hasattr(self, "data") and self.data:
            return self.data.domain
        else:
            return None

    def trigger_attributes_changed(self):
        if not self.__attribute_selection_area_initialized:
            # Some components trigger this event during the initialization.
            # We ignore those requests, a separate event will be triggered
            # manually when everything is initialized.
            return

        self.attributes_changed()

    def closeContext(self):
        super().closeContext()

        self.data = None
        self.shown_attributes = None

    # "Events"
    def attributes_changed(self):
        pass
class OWDifference(widget.OWWidget):
    name = 'Difference'
    description = 'Make the time series stationary by replacing it with ' \
                  '1st or 2nd order discrete difference along its values. '
    icon = 'icons/Difference.svg'
    priority = 570
    keywords = ['difference', 'derivative', 'quotient', 'percent change']

    class Inputs:
        time_series = Input("Time series", Table)

    class Outputs:
        time_series = Output("Time series", Timeseries)

    settingsHandler = DomainContextHandler()
    selected = ContextSetting([], schema_only=True)

    class Operation(str, Enum):
        DIFF = 'Difference'
        QUOT = 'Quotient'
        PERC = 'Percentage change'

    want_main_area = False
    resizing_enabled = False

    chosen_operation = settings.Setting(Operation.DIFF)
    diff_order = settings.Setting(1)
    shift_period = settings.Setting(1)
    invert_direction = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            'Series can be differentiated up to the 2nd order. '
            'However, if the series is shifted by other than 1 '
            'step, a differencing order of 1 is always assumed.', 'diff-shift')
    ]

    def __init__(self):
        self.data = None

        box = gui.vBox(self.controlArea, 'Differencing')

        gui.comboBox(box,
                     self,
                     'chosen_operation',
                     orientation=Qt.Horizontal,
                     items=[el.value for el in self.Operation],
                     label='Compute:',
                     callback=self.on_changed,
                     sendSelectedValue=True)

        self.order_spin = gui.spin(
            box,
            self,
            'diff_order',
            1,
            2,
            label='Differencing order:',
            callback=self.on_changed,
            tooltip='The value corresponds to n-th order numerical '
            'derivative of the series. \nThe order is fixed to 1 '
            'if the shift period is other than 1.')
        gui.spin(box,
                 self,
                 'shift_period',
                 1,
                 100,
                 label='Shift:',
                 callback=self.on_changed,
                 tooltip='Set this to other than 1 if you don\'t want to '
                 'compute differences for subsequent values but for '
                 'values shifted number of spaces apart. \n'
                 'If this value is different from 1, differencing '
                 'order is fixed to 1.')
        gui.checkBox(box,
                     self,
                     'invert_direction',
                     label='Invert differencing direction',
                     callback=self.on_changed,
                     tooltip='Influences where the series is padded with nan '
                     'values — at the beginning or at the end.')
        self.view = view = QListView(self,
                                     selectionMode=QListView.ExtendedSelection)
        self.model = model = VariableListModel(parent=self)
        view.setModel(model)
        view.selectionModel().selectionChanged.connect(self.on_changed)
        box.layout().addWidget(view)
        gui.auto_commit(box, self, 'autocommit', '&Apply')

    @Inputs.time_series
    def set_data(self, data):
        self.closeContext()
        self.data = data = None if data is None else Timeseries.from_data_table(
            data)
        if data is not None:
            self.model[:] = [
                var for var in data.domain.variables
                if var.is_continuous and var is not data.time_variable
            ]
            self.select_default_variable()
            self.openContext(self.data)
            self._restore_selection()
        else:
            self.reset_model()
        self.on_changed()

    def _restore_selection(self):
        def restore(view, selection):
            with signal_blocking(view.selectionModel()):
                # gymnastics for transforming variable names back to indices
                var_list = [
                    var for var in self.data.domain.variables
                    if var.is_continuous and var is not self.data.time_variable
                ]
                indices = [var_list.index(i) for i in selection]
                select_rows(view, indices)

        restore(self.view, self.selected)

    def select_default_variable(self):
        self.selected = [0]
        select_rows(self.view, self.selected)

    def reset_model(self):
        self.model.wrap([])

    def on_changed(self):
        var_names = [
            i.row() for i in self.view.selectionModel().selectedRows()
        ]
        self.order_spin.setEnabled(
            self.shift_period == 1
            and self.chosen_operation == self.Operation.DIFF)
        self.selected = [self.model[v] for v in var_names]
        self.commit()

    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.Outputs.time_series.send(None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        op = self.chosen_operation

        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if op == self.Operation.DIFF and shift == 1:
                out[order:] = np.diff(col, order)
                out[:order] = np.nan
            else:
                if op == self.Operation.DIFF:
                    out[shift:] = col[shift:] - col[:-shift]
                else:
                    out[shift:] = np.divide(col[shift:], col[:-shift])
                    if op == self.Operation.PERC:
                        out = (out - 1) * 100
                out[:shift] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            if op == self.Operation.DIFF and shift == 1:
                details = f'order={order}'
            else:
                details = f'shift={shift}'

            template = f'{var} ({op[:4].lower()}; {details})'
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries(
            Domain(data.domain.attributes + tuple(attrs),
                   data.domain.class_vars, data.domain.metas),
            np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.Outputs.time_series.send(ts)
Beispiel #14
0
class OWCreateClass(widget.OWWidget):
    name = "Create Class"
    description = "Create class attribute from a string attribute"
    icon = "icons/CreateClass.svg"
    category = "Data"
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    class_name = ContextSetting("class")
    rules = ContextSetting({})
    match_beginning = ContextSetting(False)
    case_sensitive = ContextSetting(False)

    TRANSFORMERS = {
        StringVariable: ValueFromStringSubstring,
        DiscreteVariable: ValueFromDiscreteSubstring
    }

    # Cached variables are used so that two instances of the widget with the
    # same settings will create the same variable. The usual `make` wouldn't
    # work here because variables with `compute_value` are not reused.
    cached_variables = {}

    class Warning(widget.OWWidget.Warning):
        no_nonnumeric_vars = Msg("Data contains only numeric variables.")

    class Error(widget.OWWidget.Error):
        class_name_duplicated = Msg("Class name duplicated.")
        class_name_empty = Msg("Class name should not be empty.")

    def __init__(self):
        super().__init__()
        self.data = None

        # The following lists are of the same length as self.active_rules

        #: list of pairs with counts of matches for each patter when the
        #     patterns are applied in order and when applied on the entire set,
        #     disregarding the preceding patterns
        self.match_counts = []

        #: list of list of QLineEdit: line edit pairs for each pattern
        self.line_edits = []
        #: list of QPushButton: list of remove buttons
        self.remove_buttons = []
        #: list of list of QLabel: pairs of labels with counts
        self.counts = []

        gui.lineEdit(self.controlArea,
                     self,
                     "class_name",
                     orientation=Qt.Horizontal,
                     box="New Class Name")

        variable_select_box = gui.vBox(self.controlArea, "Match by Substring")

        combo = gui.comboBox(variable_select_box,
                             self,
                             "attribute",
                             label="From column:",
                             orientation=Qt.Horizontal,
                             searchable=True,
                             callback=self.update_rules,
                             model=DomainModel(valid_types=(StringVariable,
                                                            DiscreteVariable)))
        # Don't use setSizePolicy keyword argument here: it applies to box,
        # not the combo
        combo.setSizePolicy(QSizePolicy.MinimumExpanding,
                            QSizePolicy.Preferred)

        patternbox = gui.vBox(variable_select_box)
        #: QWidget: the box that contains the remove buttons, line edits and
        #    count labels. The lines are added and removed dynamically.
        self.rules_box = rules_box = QGridLayout()
        rules_box.setSpacing(4)
        rules_box.setContentsMargins(4, 4, 4, 4)
        self.rules_box.setColumnMinimumWidth(1, 70)
        self.rules_box.setColumnMinimumWidth(0, 10)
        self.rules_box.setColumnStretch(0, 1)
        self.rules_box.setColumnStretch(1, 1)
        self.rules_box.setColumnStretch(2, 100)
        rules_box.addWidget(QLabel("Name"), 0, 1)
        rules_box.addWidget(QLabel("Substring"), 0, 2)
        rules_box.addWidget(QLabel("Count"), 0, 3, 1, 2)
        self.update_rules()

        widget = QWidget(patternbox)
        widget.setLayout(rules_box)
        patternbox.layout().addWidget(widget)

        box = gui.hBox(patternbox)
        gui.rubber(box)
        gui.button(box,
                   self,
                   "+",
                   callback=self.add_row,
                   autoDefault=False,
                   width=34,
                   sizePolicy=(QSizePolicy.Maximum, QSizePolicy.Maximum))

        optionsbox = gui.vBox(self.controlArea, "Options")
        gui.checkBox(optionsbox,
                     self,
                     "match_beginning",
                     "Match only at the beginning",
                     callback=self.options_changed)
        gui.checkBox(optionsbox,
                     self,
                     "case_sensitive",
                     "Case sensitive",
                     callback=self.options_changed)

        gui.rubber(self.controlArea)

        gui.button(self.buttonsArea, self, "Apply", callback=self.apply)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        # TODO: Resizing upon changing the number of rules does not work
        self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)

    @property
    def active_rules(self):
        """
        Returns the class names and patterns corresponding to the currently
            selected attribute. If the attribute is not yet in the dictionary,
            set the default.
        """
        return self.rules.setdefault(self.attribute and self.attribute.name,
                                     [["", ""], ["", ""]])

    def rules_to_edits(self):
        """Fill the line edites with the rules from the current settings."""
        for editr, textr in zip(self.line_edits, self.active_rules):
            for edit, text in zip(editr, textr):
                edit.setText(text)

    @Inputs.data
    def set_data(self, data):
        """Input data signal handler."""
        self.closeContext()
        self.rules = {}
        self.data = data
        summary = len(data) if data else self.info.NoInput
        details = format_summary_details(data) if data else ""
        self.info.set_input_summary(summary, details)
        model = self.controls.attribute.model()
        model.set_domain(data.domain if data is not None else None)
        self.Warning.no_nonnumeric_vars(shown=data is not None and not model)
        if not model:
            self.attribute = None
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        self.attribute = model[0]
        self.openContext(data)
        self.update_rules()
        self.apply()

    def update_rules(self):
        """Called when the rules are changed: adjust the number of lines in
        the form and fill them, update the counts. The widget does not have
        auto-apply."""
        self.adjust_n_rule_rows()
        self.rules_to_edits()
        self.update_counts()
        # TODO: Indicator that changes need to be applied

    def options_changed(self):
        self.update_counts()

    def adjust_n_rule_rows(self):
        """Add or remove lines if needed and fix the tab order."""
        def _add_line():
            self.line_edits.append([])
            n_lines = len(self.line_edits)
            for coli in range(1, 3):
                edit = QLineEdit()
                self.line_edits[-1].append(edit)
                self.rules_box.addWidget(edit, n_lines, coli)
                edit.textChanged.connect(self.sync_edit)
            button = gui.button(None,
                                self,
                                label='×',
                                width=33,
                                autoDefault=False,
                                callback=self.remove_row,
                                sizePolicy=(QSizePolicy.Maximum,
                                            QSizePolicy.Maximum))
            self.remove_buttons.append(button)
            self.rules_box.addWidget(button, n_lines, 0)
            self.counts.append([])
            for coli, kwargs in enumerate(
                (dict(), dict(styleSheet="color: gray"))):
                label = QLabel(alignment=Qt.AlignCenter, **kwargs)
                self.counts[-1].append(label)
                self.rules_box.addWidget(label, n_lines, 3 + coli)

        def _remove_line():
            for edit in self.line_edits.pop():
                edit.deleteLater()
            self.remove_buttons.pop().deleteLater()
            for label in self.counts.pop():
                label.deleteLater()

        def _fix_tab_order():
            prev = None
            for row, rule in zip(self.line_edits, self.active_rules):
                for col_idx, edit in enumerate(row):
                    edit.row, edit.col_idx = rule, col_idx
                    if prev is not None:
                        self.setTabOrder(prev, edit)
                    prev = edit

        n = len(self.active_rules)
        while n > len(self.line_edits):
            _add_line()
        while len(self.line_edits) > n:
            _remove_line()
        _fix_tab_order()

    def add_row(self):
        """Append a new row at the end."""
        self.active_rules.append(["", ""])
        self.adjust_n_rule_rows()
        self.update_counts()

    def remove_row(self):
        """Remove a row."""
        remove_idx = self.remove_buttons.index(self.sender())
        del self.active_rules[remove_idx]
        self.update_rules()
        self.update_counts()

    def sync_edit(self, text):
        """Handle changes in line edits: update the active rules and counts"""
        edit = self.sender()
        edit.row[edit.col_idx] = text
        self.update_counts()

    def class_labels(self):
        """Construct a list of class labels. Empty labels are replaced with
        C1, C2, C3. If C<n> already appears in the list of values given by
        the user, the labels start at C<n+1> instead.
        """
        largest_c = max(
            (int(label[1:])
             for label, _ in self.active_rules if re.match("^C\\d+", label)),
            default=0)
        class_count = count(largest_c + 1)
        return [
            label_edit.text() or "C{}".format(next(class_count))
            for label_edit, _ in self.line_edits
        ]

    def update_counts(self):
        """Recompute and update the counts of matches."""
        def _matcher(strings, pattern):
            """Return indices of strings into patterns; consider case
            sensitivity and matching at the beginning. The given strings are
            assumed to be in lower case if match is case insensitive. Patterns
            are fixed on the fly."""
            if not self.case_sensitive:
                pattern = pattern.lower()
            indices = np.char.find(strings, pattern.strip())
            return indices == 0 if self.match_beginning else indices != -1

        def _lower_if_needed(strings):
            return strings if self.case_sensitive else np.char.lower(strings)

        def _string_counts():
            """
            Generate pairs of arrays for each rule until running out of data
            instances. np.sum over the two arrays in each pair gives the
            number of matches of the remaining instances (considering the
            order of patterns) and of the original data.

            For _string_counts, the arrays contain bool masks referring to the
            original data
            """
            nonlocal data
            data = data.astype(str)
            data = data[~np.char.equal(data, "")]
            data = _lower_if_needed(data)
            remaining = np.array(data)
            for _, pattern in self.active_rules:
                matching = _matcher(remaining, pattern)
                total_matching = _matcher(data, pattern)
                yield matching, total_matching
                remaining = remaining[~matching]
                if not remaining.size:
                    break

        def _discrete_counts():
            """
            Generate pairs similar to _string_counts, except that the arrays
            contain bin counts for the attribute's values matching the pattern.
            """
            attr_vals = np.array(attr.values)
            attr_vals = _lower_if_needed(attr_vals)
            bins = bincount(data, max_val=len(attr.values) - 1)[0]
            remaining = np.array(bins)
            for _, pattern in self.active_rules:
                matching = _matcher(attr_vals, pattern)
                yield remaining[matching], bins[matching]
                remaining[matching] = 0
                if not np.any(remaining):
                    break

        def _clear_labels():
            """Clear all labels"""
            for lab_matched, lab_total in self.counts:
                lab_matched.setText("")
                lab_total.setText("")

        def _set_labels():
            """Set the labels to show the counts"""
            for (n_matched, n_total), (lab_matched, lab_total), (lab, patt) in \
                    zip(self.match_counts, self.counts, self.active_rules):
                n_before = n_total - n_matched
                lab_matched.setText("{}".format(n_matched))
                if n_before and (lab or patt):
                    lab_total.setText("+ {}".format(n_before))
                    if n_matched:
                        tip = "{} of the {} matching instances are already " \
                              "covered above".format(n_before, n_total)
                    else:
                        tip = "All matching instances are already covered above"
                    lab_total.setToolTip(tip)
                    lab_matched.setToolTip(tip)

        def _set_placeholders():
            """Set placeholders for empty edit lines"""
            matches = [n for n, _ in self.match_counts] + \
                      [0] * len(self.line_edits)
            for n_matched, (_, patt) in zip(matches, self.line_edits):
                if not patt.text():
                    patt.setPlaceholderText(
                        "(remaining instances)" if n_matched else "(unused)")

            labels = self.class_labels()
            for label, (lab_edit, _) in zip(labels, self.line_edits):
                if not lab_edit.text():
                    lab_edit.setPlaceholderText(label)

        _clear_labels()
        attr = self.attribute
        if attr is None:
            return
        counters = {
            StringVariable: _string_counts,
            DiscreteVariable: _discrete_counts
        }
        data = self.data.get_column_view(attr)[0]
        self.match_counts = [[int(np.sum(x)) for x in matches]
                             for matches in counters[type(attr)]()]
        _set_labels()
        _set_placeholders()

    def apply(self):
        """Output the transformed data."""
        self.Error.clear()
        self.class_name = self.class_name.strip()
        if not self.attribute:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        domain = self.data.domain
        if not self.class_name:
            self.Error.class_name_empty()
        if self.class_name in domain:
            self.Error.class_name_duplicated()
        if not self.class_name or self.class_name in domain:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        new_class = self._create_variable()
        new_domain = Domain(domain.attributes, new_class,
                            domain.metas + domain.class_vars)
        new_data = self.data.transform(new_domain)
        summary = len(new_data) if new_data is not None else self.info.NoOutput
        details = format_summary_details(
            new_data) if new_data is not None else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.data.send(new_data)

    def _create_variable(self):
        rules = self.active_rules
        # Transposition + stripping
        valid_rules = [
            label or pattern or n_matches
            for (label, pattern), n_matches in zip(rules, self.match_counts)
        ]
        patterns = tuple(pattern
                         for (_, pattern), valid in zip(rules, valid_rules)
                         if valid)
        names = tuple(name
                      for name, valid in zip(self.class_labels(), valid_rules)
                      if valid)
        transformer = self.TRANSFORMERS[type(self.attribute)]

        # join patters with the same names
        names, map_values = unique_in_order_mapping(names)
        names = tuple(str(a) for a in names)
        map_values = tuple(map_values)

        var_key = (self.attribute, self.class_name, names, patterns,
                   self.case_sensitive, self.match_beginning, map_values)
        if var_key in self.cached_variables:
            return self.cached_variables[var_key]

        compute_value = transformer(self.attribute, patterns,
                                    self.case_sensitive, self.match_beginning,
                                    map_values)
        new_var = DiscreteVariable(self.class_name,
                                   names,
                                   compute_value=compute_value)
        self.cached_variables[var_key] = new_var
        return new_var

    def send_report(self):
        # Pylint gives false positives: these functions are always called from
        # within the loop
        # pylint: disable=undefined-loop-variable
        def _cond_part():
            rule = "<b>{}</b> ".format(class_name)
            if patt:
                rule += "if <b>{}</b> contains <b>{}</b>".format(
                    self.attribute.name, patt)
            else:
                rule += "otherwise"
            return rule

        def _count_part():
            if not n_matched:
                return "all {} matching instances are already covered " \
                       "above".format(n_total)
            elif n_matched < n_total and patt:
                return "{} matching instances (+ {} that are already " \
                       "covered above".format(n_matched, n_total - n_matched)
            else:
                return "{} matching instances".format(n_matched)

        if not self.attribute:
            return
        self.report_items("Input", [("Source attribute", self.attribute.name)])
        output = ""
        names = self.class_labels()
        for (n_matched, n_total), class_name, (lab, patt) in \
                zip(self.match_counts, names, self.active_rules):
            if lab or patt or n_total:
                output += "<li>{}; {}</li>".format(_cond_part(), _count_part())
        if output:
            self.report_items("Output", [("Class name", self.class_name)])
            self.report_raw("<ol>{}</ol>".format(output))
class OWPivot(OWWidget):
    name = "Pivot Table"
    description = "Reshape data table based on column values."
    icon = "icons/Pivot.svg"
    priority = 1000
    keywords = ["pivot", "group", "aggregate"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        pivot_table = Output("Pivot Table", Table, default=True)
        filtered_data = Output("Filtered Data", Table)
        grouped_data = Output("Grouped Data", Table)

    class Warning(OWWidget.Warning):
        # TODO - inconsistent for different variable types
        no_col_feature = Msg("Column feature should be selected.")
        cannot_aggregate = Msg("Some aggregations ({}) cannot be computed.")
        renamed_vars = Msg("Some variables have been renamed in some tables"
                           "to avoid duplicates.\n{}")
        too_many_values = Msg("Selected variable has too many values.")

    settingsHandler = DomainContextHandler()
    row_feature = ContextSetting(None)
    col_feature = ContextSetting(None)
    val_feature = ContextSetting(None)
    sel_agg_functions = Setting(set([Pivot.Count]))
    selection = Setting(set(), schema_only=True)
    auto_commit = Setting(True)

    AGGREGATIONS = (Pivot.Count,
                    Pivot.Count_defined,
                    None,  # separator
                    Pivot.Sum,
                    Pivot.Mean,
                    Pivot.Var,
                    Pivot.Median,
                    2,  # column break
                    Pivot.Mode,
                    Pivot.Min,
                    Pivot.Max,
                    None,
                    Pivot.Majority)

    MAX_VALUES = 100

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.pivot = None  # type: Pivot
        self.__pending_selection = self.selection  # type: Set
        self._add_control_area_controls()
        self._add_main_area_controls()

    def _add_control_area_controls(self):
        gui.comboBox(gui.vBox(self.controlArea, box="Rows"),
                     self, "row_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(valid_types=DomainModel.PRIMITIVE),
                     callback=self.__feature_changed,
                     orientation=Qt.Horizontal)
        gui.comboBox(gui.vBox(self.controlArea, box="Columns"),
                     self, "col_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(placeholder="(Same as rows)",
                                       valid_types=DiscreteVariable),
                     callback=self.__feature_changed,
                     orientation=Qt.Horizontal)
        gui.comboBox(gui.vBox(self.controlArea, box="Values"),
                     self, "val_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(placeholder="(None)"),
                     callback=self.__val_feature_changed,
                     orientation=Qt.Horizontal)
        self.__add_aggregation_controls()
        gui.rubber(self.controlArea)
        gui.auto_apply(self.buttonsArea, self, "auto_commit")

        self.set_input_summary()
        self.set_output_summary(None, None, None)

    def __add_aggregation_controls(self):
        def new_inbox():
            nonlocal row, col, inbox
            inbox = QWidget()
            layout = QGridLayout()
            inbox.setLayout(layout)
            layout.setContentsMargins(0, 0, 0, 0)
            box.layout().addWidget(inbox)
            row = col = 0

        box = gui.vBox(self.controlArea, "Aggregations")
        row = col = 0
        inbox = None
        new_inbox()
        self.aggregation_checkboxes = []  # for test purposes
        for agg in self.AGGREGATIONS:
            if agg is None:
                line = QFrame()
                line.setFrameShape(QFrame.HLine)
                line.setLineWidth(1)
                line.setFrameShadow(QFrame.Sunken)
                box.layout().addWidget(line)
                new_inbox()
                continue
            elif agg == 2:
                col += 1
                row = 0
                continue
            check_box = QCheckBox(str(agg), inbox)
            check_box.setChecked(agg in self.sel_agg_functions)
            check_box.clicked.connect(lambda *args, a=agg:
                                      self.__aggregation_cb_clicked(a, args[0]))
            inbox.layout().addWidget(check_box, row, col)
            self.aggregation_checkboxes.append(check_box)
            row += 1

    def _add_main_area_controls(self):
        self.table_view = PivotTableView()
        self.table_view.selection_changed.connect(self.__invalidate_filtered)
        self.mainArea.layout().addWidget(self.table_view)

    @property
    def no_col_feature(self):
        return self.col_feature is None and self.row_feature is not None \
            and self.row_feature.is_continuous

    @property
    def skipped_aggs(self):
        def add(fun):
            data, var = self.data, self.val_feature
            return data and not var and fun not in Pivot.AutonomousFunctions \
                or var and var.is_discrete and fun in Pivot.ContVarFunctions \
                or var and var.is_continuous and fun in Pivot.DiscVarFunctions
        skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)]
        return ", ".join(sorted(skipped))

    def __feature_changed(self):
        self.selection = set()
        self.pivot = None
        self.commit()

    def __val_feature_changed(self):
        self.selection = set()
        if self.no_col_feature:
            return
        self.pivot.update_pivot_table(self.val_feature)
        self.commit()

    def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions, checked: bool):
        self.selection = set()
        if checked:
            self.sel_agg_functions.add(agg_fun)
        else:
            self.sel_agg_functions.remove(agg_fun)
        if self.no_col_feature or not self.pivot or not self.data:
            return
        self.pivot.update_group_table(self.sel_agg_functions, self.val_feature)
        self.commit()

    def __invalidate_filtered(self):
        self.selection = self.table_view.get_selection()
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selection = set()
        self.data = data
        self.pivot = None
        self.check_data()
        self.init_attr_values()
        self.openContext(self.data)
        self.unconditional_commit()

    def check_data(self):
        self.clear_messages()
        self.set_input_summary()

    def init_attr_values(self):
        domain = self.data.domain if self.data and len(self.data) else None
        for attr in ("row_feature", "col_feature", "val_feature"):
            getattr(self.controls, attr).model().set_domain(domain)
            setattr(self, attr, None)
        model = self.controls.row_feature.model()
        if model:
            self.row_feature = model[0]
        model = self.controls.val_feature.model()
        if model and len(model) > 2:
            self.val_feature = domain.variables[0] \
                if domain.variables[0] in model else model[2]

    def commit(self):
        def send_outputs(pivot_table, filtered_data, grouped_data):
            self.Outputs.grouped_data.send(grouped_data)
            self.Outputs.pivot_table.send(pivot_table)
            self.Outputs.filtered_data.send(filtered_data)
            self.set_output_summary(pivot_table, filtered_data, grouped_data)

        self.Warning.renamed_vars.clear()
        self.Warning.too_many_values.clear()
        self.Warning.cannot_aggregate.clear()
        self.Warning.no_col_feature.clear()

        if self.pivot is None:
            if self.no_col_feature:
                self.table_view.clear()
                self.Warning.no_col_feature()
                send_outputs(None, None, None)
                return

            if self.data:
                col_var = self.col_feature or self.row_feature
                col = self.data.get_column_view(col_var)[0].astype(np.float)
                if len(nanunique(col)) >= self.MAX_VALUES:
                    self.table_view.clear()
                    self.Warning.too_many_values()
                    send_outputs(None, None, None)
                    return

            self.pivot = Pivot(self.data, self.sel_agg_functions,
                               self.row_feature,
                               self.col_feature, self.val_feature)

        if self.skipped_aggs:
            self.Warning.cannot_aggregate(self.skipped_aggs)
        self._update_graph()

        send_outputs(self.pivot.pivot_table,
                     self.get_filtered_data(),
                     self.pivot.group_table)

        if self.pivot.renamed:
            self.Warning.renamed_vars(self.pivot.renamed)

    def set_input_summary(self):
        summary = len(self.data) if self.data else self.info.NoInput
        details = format_summary_details(self.data) if self.data else ""
        self.info.set_input_summary(summary, details)

    def set_output_summary(self, pivot: Table, filtered: Table, grouped: Table):
        summary, detail, kwargs = self.info.NoOutput, "", {}
        if pivot or filtered or grouped:
            n_pivot = len(pivot) if pivot else 0
            n_filtered = len(filtered) if filtered else 0
            n_grouped = len(grouped) if grouped else 0
            summary = f"{self.info.format_number(n_pivot)}, " \
                      f"{self.info.format_number(n_filtered)}, " \
                      f"{self.info.format_number(n_grouped)}"
            detail = format_multiple_summaries([
                ("Pivot table", pivot),
                ("Filtered data", filtered),
                ("Grouped data", grouped)
            ])
            kwargs = {"format": Qt.RichText}
        self.info.set_output_summary(summary, detail, **kwargs)

    def _update_graph(self):
        self.table_view.clear()
        if self.pivot.pivot_table:
            col_feature = self.col_feature or self.row_feature
            self.table_view.update_table(col_feature.name,
                                         self.row_feature.name,
                                         *self.pivot.pivot_tables)
            selection = self.__pending_selection or self.selection
            self.table_view.set_selection(selection)
            self.selection = self.table_view.get_selection()
            self.__pending_selection = set()

    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values(cond, conjunction=False)(self.data)

    @staticmethod
    def sizeHint():
        return QSize(640, 525)

    def send_report(self):
        self.report_items((
            ("Row feature", self.row_feature),
            ("Column feature", self.col_feature),
            ("Value feature", self.val_feature)))
        if self.data and self.val_feature is not None:
            self.report_table("", self.table_view)
        if not self.data:
            self.report_items((("Group by", self.row_feature),))
            self.report_table(self.table_view)
class OWHyper(OWWidget):
    name = "HyperSpectra"

    class Inputs:
        data = Input("Data", Orange.data.Table, default=True)

    class Outputs:
        selected_data = Output("Selection", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    icon = "icons/hyper.svg"
    priority = 20
    replaces = ["orangecontrib.infrared.widgets.owhyper.OWHyper"]

    settings_version = 3
    settingsHandler = DomainContextHandler()

    imageplot = SettingProvider(ImagePlot)
    curveplot = SettingProvider(CurvePlotHyper)

    integration_method = Setting(0)
    integration_methods = Integrate.INTEGRALS
    value_type = Setting(0)
    attr_value = ContextSetting(None)

    lowlim = Setting(None)
    highlim = Setting(None)
    choose = Setting(None)

    class Warning(OWWidget.Warning):
        threshold_error = Msg("Low slider should be less than High")

    class Error(OWWidget.Warning):
        image_too_big = Msg("Image for chosen features is too big ({} x {}).")

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            # delete the saved attr_value to prevent crashes
            try:
                del settings_["context_settings"][0].values["attr_value"]
            except:
                pass

        # migrate selection
        if version <= 2:
            try:
                current_context = settings_["context_settings"][0]
                selection = getattr(current_context, "selection", None)
                if selection is not None:
                    selection = [(i, 1)
                                 for i in np.flatnonzero(np.array(selection))]
                    settings_.setdefault(
                        "imageplot", {})["selection_group_saved"] = selection
            except:
                pass

    def __init__(self):
        super().__init__()

        dbox = gui.widgetBox(self.controlArea, "Image values")

        rbox = gui.radioButtons(dbox,
                                self,
                                "value_type",
                                callback=self._change_integration)

        gui.appendRadioButton(rbox, "From spectra")

        self.box_values_spectra = gui.indentedBox(rbox)

        gui.comboBox(self.box_values_spectra,
                     self,
                     "integration_method",
                     valueType=int,
                     items=(a.name for a in self.integration_methods),
                     callback=self._change_integral_type)
        gui.rubber(self.controlArea)

        gui.appendRadioButton(rbox, "Use feature")

        self.box_values_feature = gui.indentedBox(rbox)

        self.feature_value_model = DomainModel(
            DomainModel.METAS | DomainModel.CLASSES,
            valid_types=DomainModel.PRIMITIVE)
        self.feature_value = gui.comboBox(self.box_values_feature,
                                          self,
                                          "attr_value",
                                          callback=self.update_feature_value,
                                          model=self.feature_value_model,
                                          sendSelectedValue=True,
                                          valueType=str)

        splitter = QSplitter(self)
        splitter.setOrientation(Qt.Vertical)
        self.imageplot = ImagePlot(self)
        self.imageplot.selection_changed.connect(self.image_selection_changed)

        self.curveplot = CurvePlotHyper(self, select=SELECTONE)
        self.curveplot.plot.vb.x_padding = 0.005  # pad view so that lines are not hidden
        splitter.addWidget(self.imageplot)
        splitter.addWidget(self.curveplot)
        self.mainArea.layout().addWidget(splitter)

        self.line1 = MovableVline(position=self.lowlim,
                                  label="",
                                  report=self.curveplot)
        self.line1.sigMoved.connect(lambda v: setattr(self, "lowlim", v))
        self.line2 = MovableVline(position=self.highlim,
                                  label="",
                                  report=self.curveplot)
        self.line2.sigMoved.connect(lambda v: setattr(self, "highlim", v))
        self.line3 = MovableVline(position=self.choose,
                                  label="",
                                  report=self.curveplot)
        self.line3.sigMoved.connect(lambda v: setattr(self, "choose", v))
        for line in [self.line1, self.line2, self.line3]:
            line.sigMoveFinished.connect(self.changed_integral_range)
            self.curveplot.add_marking(line)
            line.hide()

        self.data = None
        self.disable_integral_range = False

        self.resize(900, 700)
        self.graph_name = "imageplot.plotview"
        self._update_integration_type()

        # prepare interface according to the new context
        self.contextAboutToBeOpened.connect(
            lambda x: self.init_interface_data(x[0]))

    def init_interface_data(self, data):
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def image_selection_changed(self):
        if not self.data:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.curveplot.set_data(None)
            return

        indices = np.flatnonzero(self.imageplot.selection_group)

        annotated_data = create_groups_table(self.data,
                                             self.imageplot.selection_group)
        if annotated_data is not None:
            annotated_data.X = self.data.X  # workaround for Orange's copying on domain conversio
        self.Outputs.annotated_data.send(annotated_data)

        selected = self.data[indices]
        self.Outputs.selected_data.send(selected if selected else None)
        if selected:
            self.curveplot.set_data(selected)
        else:
            self.curveplot.set_data(self.data)

    def selection_changed(self):
        self.redraw_data()

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.feature_value_model.set_domain(domain)
        self.attr_value = self.feature_value_model[
            0] if self.feature_value_model else None

    def redraw_data(self):
        self.imageplot.update_view()

    def update_feature_value(self):
        self.redraw_data()

    def _update_integration_type(self):
        self.line1.hide()
        self.line2.hide()
        self.line3.hide()
        if self.value_type == 0:
            self.box_values_spectra.setDisabled(False)
            self.box_values_feature.setDisabled(True)
            if self.integration_methods[
                    self.integration_method] != Integrate.PeakAt:
                self.line1.show()
                self.line2.show()
            else:
                self.line3.show()
        elif self.value_type == 1:
            self.box_values_spectra.setDisabled(True)
            self.box_values_feature.setDisabled(False)
        QTest.qWait(1)  # first update the interface

    def _change_integration(self):
        # change what to show on the image
        self._update_integration_type()
        self.redraw_data()

    def changed_integral_range(self):
        if self.disable_integral_range:
            return
        self.redraw_data()

    def _change_integral_type(self):
        self._change_integration()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.openContext(data)
        self.data = data
        self.imageplot.set_data(data)
        self.curveplot.set_data(data)
        self._init_integral_boundaries()
        self.imageplot.update_view()

    def _init_integral_boundaries(self):
        # requires data in curveplot
        self.disable_integral_range = True
        if self.curveplot.data_x is not None and len(self.curveplot.data_x):
            minx = self.curveplot.data_x[0]
            maxx = self.curveplot.data_x[-1]

            if self.lowlim is None or not minx <= self.lowlim <= maxx:
                self.lowlim = minx
            self.line1.setValue(self.lowlim)

            if self.highlim is None or not minx <= self.highlim <= maxx:
                self.highlim = maxx
            self.line2.setValue(self.highlim)

            if self.choose is None:
                self.choose = (minx + maxx) / 2
            elif self.choose < minx:
                self.choose = minx
            elif self.choose > maxx:
                self.choose = maxx
            self.line3.setValue(self.choose)
        self.disable_integral_range = False
Beispiel #17
0
class OWMergeData(widget.OWWidget):
    name = "Merge Data"
    description = "Merge datasets based on the values of selected features."
    icon = "icons/MergeData.svg"
    priority = 1110
    keywords = ["join"]

    class Inputs:
        data = Input("Data",
                     Orange.data.Table,
                     default=True,
                     replaces=["Data A"])
        extra_data = Input("Extra Data",
                           Orange.data.Table,
                           replaces=["Data B"])

    class Outputs:
        data = Output(
            "Data",
            Orange.data.Table,
            replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"])

    LeftJoin, InnerJoin, OuterJoin = range(3)
    OptionNames = ("Append columns from Extra data",
                   "Find matching pairs of rows", "Concatenate tables")
    OptionDescriptions = (
        "The first table may contain, for instance, city names,\n"
        "and the second would be a list of cities and their coordinates.\n"
        "Columns with coordinates would then be appended to the output.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains matched instances. Rows without matches are removed.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains all instances. Data from merged instances is "
        "merged into single rows.")

    UserAdviceMessages = [
        widget.Message("Confused about merging options?\nSee the tooltips!",
                       "merging_types")
    ]

    settingsHandler = MergeDataContextHandler()
    attr_pairs = ContextSetting(None, schema_only=True)
    merging = Setting(LeftJoin)
    auto_apply = Setting(True)
    settings_version = 2

    want_main_area = False
    resizing_enabled = False

    class Warning(widget.OWWidget.Warning):
        renamed_vars = Msg("Some variables have been renamed "
                           "to avoid duplicates.\n{}")

    class Error(widget.OWWidget.Error):
        matching_numeric_with_nonnum = Msg(
            "Numeric and non-numeric columns ({} and {}) cannot be matched.")
        matching_index_with_sth = Msg("Row index cannot be matched with {}.")
        matching_id_with_sth = Msg("Instance cannot be matched with {}.")
        nonunique_left = Msg(
            "Some combinations of values on the left appear in multiple rows.\n"
            "For this type of merging, every possible combination of values "
            "on the left should appear at most once.")
        nonunique_right = Msg(
            "Some combinations of values on the right appear in multiple rows."
            "\n"
            "Every possible combination of values on the right should appear "
            "at most once.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.extra_data = None

        content = [
            INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES,
            DomainModel.METAS
        ]
        self.model = DomainModelWithTooltips(content)
        self.extra_model = DomainModelWithTooltips(content)

        box = gui.hBox(self.controlArea, box=None)
        no_info = self.data_info(None)
        self.info_box_data = gui.label(box, self, no_info, box="Data")
        self.info_box_extra_data = gui.label(box,
                                             self,
                                             no_info,
                                             box="Extra Data")

        grp = gui.radioButtons(self.controlArea,
                               self,
                               "merging",
                               box="Merging",
                               btnLabels=self.OptionNames,
                               tooltips=self.OptionDescriptions,
                               callback=self.change_merging)
        grp.layout().setSpacing(8)

        self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "",
                                       "matches")
        self.attr_boxes.add_row()
        box = gui.vBox(self.controlArea, box="Row matching")
        box.layout().addWidget(self.attr_boxes)

        gui.auto_apply(self.controlArea, self, box=False)
        # connect after wrapping self.commit with gui.auto_commit!
        self.attr_boxes.vars_changed.connect(self.commit)
        self.attr_boxes.vars_changed.connect(self.store_combo_state)
        self.settingsAboutToBePacked.connect(self.store_combo_state)

    def change_merging(self):
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.data = data
        self.model.set_domain(data and data.domain)
        self.info_box_data.setText(self.data_info(data))

    @Inputs.extra_data
    @check_sql_input
    def set_extra_data(self, data):
        self.extra_data = data
        self.extra_model.set_domain(data and data.domain)
        self.info_box_extra_data.setText(self.data_info(data))

    def store_combo_state(self):
        self.attr_pairs = self.attr_boxes.current_state()

    def handleNewSignals(self):
        self.closeContext()
        self.attr_pairs = [self._find_best_match()]
        self.openContext(self.data and self.data.domain, self.extra_data
                         and self.extra_data.domain)
        self.attr_boxes.set_state(self.attr_pairs)
        self.unconditional_commit()

    def _find_best_match(self):
        def get_unique_str_metas_names(model_):
            return [m for m in model_ if isinstance(m, StringVariable)]

        attr, extra_attr, n_max_intersect = INDEX, INDEX, 0
        str_metas = get_unique_str_metas_names(self.model)
        extra_str_metas = get_unique_str_metas_names(self.extra_model)
        for m_a, m_b in product(str_metas, extra_str_metas):
            col = self.data[:, m_a].metas
            extra_col = self.extra_data[:, m_b].metas
            if col.size and extra_col.size \
                    and isinstance(col[0][0], str) \
                    and isinstance(extra_col[0][0], str):
                n_inter = len(np.intersect1d(col, extra_col))
                if n_inter > n_max_intersect:
                    n_max_intersect, attr, extra_attr = n_inter, m_a, m_b
        return attr, extra_attr

    @staticmethod
    def data_info(data):
        if data is None:
            return "No data."
        else:
            return \
                f"{data.name}\n" \
                f"{len(data)} instances\n" \
                f"{len(data.domain) + len(data.domain.metas)} variables"

    def commit(self):
        self.clear_messages()
        merged = self.merge() if self.data and self.extra_data else None
        self.Outputs.data.send(merged)

    def send_report(self):
        # pylint: disable=invalid-sequence-index
        self.report_items(
            (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join(
                f"{self._get_col_name(left)} with {self._get_col_name(right)}"
                for left, right in self.attr_boxes.current_state()))))

    def merge(self):
        # pylint: disable=invalid-sequence-index
        pairs = self.attr_boxes.current_state()
        if not self._check_pair_types(pairs):
            return None
        left_vars, right_vars = zip(*pairs)
        left_mask = np.full(len(self.data), True)
        left = np.vstack(
            tuple(
                self._values(self.data, var, left_mask)
                for var in left_vars)).T
        right_mask = np.full(len(self.extra_data), True)
        right = np.vstack(
            tuple(
                self._values(self.extra_data, var, right_mask)
                for var in right_vars)).T
        if not self._check_uniqueness(left, left_mask, right, right_mask):
            return None
        method = self._merge_methods[self.merging]
        lefti, righti, rightu = method(self, left, left_mask, right,
                                       right_mask)
        reduced_extra_data = \
            self._compute_reduced_extra_data(right_vars, lefti, righti, rightu)
        return self._join_table_by_indices(reduced_extra_data, lefti, righti,
                                           rightu)

    def _check_pair_types(self, pairs):
        for left, right in pairs:
            if isinstance(left, ContinuousVariable) \
                    != isinstance(right, ContinuousVariable):
                self.Error.matching_numeric_with_nonnum(left, right)
                return False
            if INDEX in (left, right) and left != right:
                self.Error.matching_index_with_sth(
                    self._get_col_name(({left, right} - {INDEX}).pop()))
                return False
            if INSTANCEID in (left, right) and left != right:
                self.Error.matching_id_with_sth(
                    self._get_col_name(({left, right} - {INSTANCEID}).pop()))
                return False
        return True

    @staticmethod
    def _get_col_name(obj):
        return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower()

    def _check_uniqueness(self, left, left_mask, right, right_mask):
        ok = True
        masked_right = right[right_mask]
        if len(set(map(tuple, masked_right))) != len(masked_right):
            self.Error.nonunique_right()
            ok = False
        if self.merging != self.LeftJoin:
            masked_left = left[left_mask]
            if len(set(map(tuple, masked_left))) != len(masked_left):
                self.Error.nonunique_left()
                ok = False
        return ok

    def _compute_reduced_extra_data(self, right_match_vars, lefti, righti,
                                    rightu):
        """Prepare a table with extra columns that will appear in the merged
        table"""
        domain = self.data.domain
        extra_domain = self.extra_data.domain

        def var_needed(var):
            if rightu is not None and rightu.size:
                return True
            if var in right_match_vars and self.merging != self.OuterJoin:
                return False
            if var not in domain:
                return True
            both_defined = (lefti != -1) * (righti != -1)
            left_col = \
                self.data.get_column_view(var)[0][lefti[both_defined]]
            right_col = \
                self.extra_data.get_column_view(var)[0][righti[both_defined]]
            if var.is_primitive():
                left_col = left_col.astype(float)
                right_col = right_col.astype(float)
                mask_left = np.isfinite(left_col)
                mask_right = np.isfinite(right_col)
                return not (np.all(mask_left == mask_right) and np.all(
                    left_col[mask_left] == right_col[mask_right]))
            else:
                return not np.all(left_col == right_col)

        extra_vars = [
            var for var in chain(extra_domain.variables, extra_domain.metas)
            if var_needed(var)
        ]
        return self.extra_data[:, extra_vars]

    @staticmethod
    def _values(data, var, mask):
        """Return an iterotor over keys for rows of the table."""
        if var == INDEX:
            return np.arange(len(data))
        if var == INSTANCEID:
            return np.fromiter((inst.id for inst in data),
                               count=len(data),
                               dtype=np.int)
        col = data.get_column_view(var)[0]
        if var.is_primitive():
            col = col.astype(float, copy=False)
            nans = np.isnan(col)
            mask *= ~nans
            if var.is_discrete:
                col = col.astype(int)
                col[nans] = len(var.values)
                col = np.array(var.values + [np.nan])[col]
        else:
            col = col.copy()
            defined = col.astype(bool)
            mask *= defined
            col[~mask] = np.nan
        return col

    def _left_join_indices(self, left, left_mask, right, right_mask):
        """Compute a two-row array of indices:
        - the first row contains indices for the primary table,
        - the second row contains the matching rows in the extra table or -1"""
        data = self.data
        # Don't match nans. This is needed since numpy may change nan to string
        # nan, so nan's will match each other
        indices = np.arange(len(right))
        indices[~right_mask] = -1
        if right.shape[1] == 1:
            # The more common case can be handled faster
            right_map = dict(zip(right.flatten(), indices))
            righti = (right_map.get(val, -1) for val in left.flatten())
        else:
            right_map = dict(zip(map(tuple, right), indices))
            righti = (right_map.get(tuple(val), -1) for val in left)
        righti = np.fromiter(righti, dtype=np.int64, count=len(data))
        lefti = np.arange(len(data), dtype=np.int64)
        righti[lefti[~left_mask]] = -1
        return lefti, righti, None

    def _inner_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then remove those with no match in the second table"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        mask = righti != [-1]
        return lefti[mask], righti[mask], None

    def _outer_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then add rows in the second table without a match in the first"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        unused = np.full(len(right), True)
        unused[righti] = False
        if len(right) - 1 not in righti:
            # righti can include -1, which sets the last element as used
            unused[-1] = True
        return lefti, righti, np.nonzero(unused)[0]

    _merge_methods = [
        _left_join_indices, _inner_join_indices, _outer_join_indices
    ]

    def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
        """Join (horizontally) self.data and reduced_extra, taking the pairs
        of rows given in indices"""
        if not lefti.size:
            return None
        lt_dom = self.data.domain
        xt_dom = reduced_extra.domain
        domain = self._domain_rename_duplicates(
            lt_dom.attributes + xt_dom.attributes,
            lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas)
        X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti,
                                        righti)
        Y = self._join_array_by_indices(np.c_[self.data.Y],
                                        np.c_[reduced_extra.Y], lefti, righti)
        string_cols = [
            i for i, var in enumerate(domain.metas) if var.is_string
        ]
        metas = self._join_array_by_indices(self.data.metas,
                                            reduced_extra.metas, lefti, righti,
                                            string_cols)
        if rightu is not None:
            # This domain is used for transforming the extra rows for outer join
            # It must use the original - not renamed - variables from right, so
            # values are copied,
            # but new domain for the left, so renamed values are *not* copied
            right_domain = Orange.data.Domain(
                domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes,
                domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars,
                domain.metas[:len(lt_dom.metas)] + xt_dom.metas)
            extras = self.extra_data[rightu].transform(right_domain)
            X = np.vstack((X, extras.X))
            extras_Y = extras.Y
            if extras_Y.ndim == 1:
                extras_Y = extras_Y.reshape(-1, 1)
            Y = np.vstack((Y, extras_Y))
            metas = np.vstack((metas, extras.metas))
        table = Orange.data.Table.from_numpy(domain, X, Y, metas)
        table.name = getattr(self.data, 'name', '')
        table.attributes = getattr(self.data, 'attributes', {})
        if rightu is not None:
            table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu]))
        else:
            table.ids = self.data.ids[lefti]

        return table

    def _domain_rename_duplicates(self, attributes, class_vars, metas):
        """Check for duplicate variable names in domain. If any, rename
        the variables, by replacing them with new ones (names are
        appended a number). """
        attrs, cvars, mets = [], [], []
        n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len(
            metas)
        lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas

        all_vars = attributes + class_vars + metas
        proposed_names = [m.name for m in all_vars]
        unique_names = get_unique_names_duplicates(proposed_names)
        duplicates = set()
        for p_name, u_name, var, c in zip(proposed_names, unique_names,
                                          all_vars, lists):
            if p_name != u_name:
                duplicates.add(p_name)
                var = var.copy(name=u_name)
            c.append(var)
        if duplicates:
            self.Warning.renamed_vars(", ".join(duplicates))
        return Orange.data.Domain(attrs, cvars, mets)

    @staticmethod
    def _join_array_by_indices(left, right, lefti, righti, string_cols=None):
        """Join (horizontally) two arrays, taking pairs of rows given in indices
        """
        def prepare(arr, inds, str_cols):
            try:
                newarr = arr[inds]
            except IndexError:
                newarr = np.full_like(arr, np.nan)
            else:
                empty = np.full(arr.shape[1], np.nan)
                if str_cols:
                    assert arr.dtype == object
                    empty = empty.astype(object)
                    empty[str_cols] = ''
                newarr[inds == -1] = empty
            return newarr

        left_width = left.shape[1]
        str_left = [i for i in string_cols or () if i < left_width]
        str_right = [
            i - left_width for i in string_cols or () if i >= left_width
        ]
        res = hstack((prepare(left, lefti,
                              str_left), prepare(right, righti, str_right)))
        return res

    @staticmethod
    def migrate_settings(settings, version=None):
        def mig_value(x):
            if x == "Position (index)":
                return INDEX
            if x == "Source position (index)":
                return INSTANCEID
            return x

        if not version:
            operations = ("augment", "merge", "combine")
            oper = operations[settings["merging"]]
            settings["attr_pairs"] = ([
                (mig_value(settings[f"attr_{oper}_data"]),
                 mig_value(settings[f"attr_{oper}_extra"]))
            ])
            for oper in operations:
                del settings[f"attr_{oper}_data"]
                del settings[f"attr_{oper}_extra"]

        if not version or version < 2 and "attr_pairs" in settings:
            attr_pairs = settings.pop("attr_pairs")
            attr_pairs = [
                tuple((var, 100) if isinstance(var, str) else var
                      for var in pair) for pair in attr_pairs
            ]
            context = ContextHandler().new_context()
            context.variables1 = \
                dict(var for var, _ in attr_pairs if var[1] > 100)
            context.variables2 = \
                dict(var for _, var in attr_pairs if var[1] > 100)
            context.values["attr_pairs"] = attr_pairs
            settings["context_settings"] = [context]
Beispiel #18
0
class OWAggregateColumns(widget.OWWidget):
    name = "Aggregate Columns"
    description = "Compute a sum, max, min ... of selected columns."
    icon = "icons/AggregateColumns.svg"
    priority = 100
    keywords = ["aggregate", "sum", "product", "max", "min", "mean",
                "median", "variance"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False

    settingsHandler = DomainContextHandler()
    variables: List[Variable] = ContextSetting([])
    operation = Setting("Sum")
    var_name = Setting("agg")
    auto_apply = Setting(True)

    Operations = {"Sum": np.nansum, "Product": np.nanprod,
                  "Min": np.nanmin, "Max": np.nanmax,
                  "Mean": np.nanmean, "Variance": np.nanvar,
                  "Median": np.nanmedian}
    TimePreserving = ("Min", "Max", "Mean", "Median")

    def __init__(self):
        super().__init__()
        self.data = None

        box = gui.vBox(self.controlArea, box=True)

        self.variable_model = DomainModel(
            order=DomainModel.MIXED, valid_types=(ContinuousVariable, ))
        var_list = gui.listView(
            box, self, "variables", model=self.variable_model,
            callback=self.commit.deferred
        )
        var_list.setSelectionMode(var_list.ExtendedSelection)

        combo = gui.comboBox(
            box, self, "operation",
            label="Operator: ", orientation=Qt.Horizontal,
            items=list(self.Operations), sendSelectedValue=True,
            callback=self.commit.deferred
        )
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

        gui.lineEdit(
            box, self, "var_name",
            label="Variable name: ", orientation=Qt.Horizontal,
            callback=self.commit.deferred
        )

        gui.auto_apply(self.controlArea, self)

    @Inputs.data
    def set_data(self, data: Table = None):
        self.closeContext()
        self.variables.clear()
        self.data = data
        if self.data:
            self.variable_model.set_domain(data.domain)
            self.openContext(data)
        else:
            self.variable_model.set_domain(None)
        self.commit.now()

    @gui.deferred
    def commit(self):
        augmented = self._compute_data()
        self.Outputs.data.send(augmented)

    def _compute_data(self):
        if not self.data or not self.variables:
            return self.data

        new_col = self._compute_column()
        new_var = self._new_var()
        return self.data.add_column(new_var, new_col)

    def _compute_column(self):
        arr = np.empty((len(self.data), len(self.variables)))
        for i, var in enumerate(self.variables):
            arr[:, i] = self.data.get_column_view(var)[0].astype(float)
        func = self.Operations[self.operation]
        return func(arr, axis=1)

    def _new_var_name(self):
        return get_unique_names(self.data.domain, self.var_name)

    def _new_var(self):
        name = self._new_var_name()
        if self.operation in self.TimePreserving \
                and all(isinstance(var, TimeVariable) for var in self.variables):
            return TimeVariable(name)
        return ContinuousVariable(name)

    def send_report(self):
        # fp for self.variables, pylint: disable=unsubscriptable-object
        if not self.data or not self.variables:
            return
        var_list = ", ".join(f"'{var.name}'"
                             for var in self.variables[:31][:-1])
        if len(self.variables) > 30:
            var_list += f" and {len(self.variables) - 30} others"
        else:
            var_list += f" and '{self.variables[-1].name}'"
        self.report_items((
            ("Output:",
             f"'{self._new_var_name()}' as {self.operation.lower()} of {var_list}"
            ),
        ))
Beispiel #19
0
class OWSieveDiagram(OWWidget):
    name = "Sieve Diagram"
    description = "Visualize the observed and expected frequencies " \
                  "for a combination of values."
    icon = "icons/SieveDiagram.svg"
    priority = 200
    keywords = []

    class Inputs:
        data = Input("Data", Table, default=True)
        features = Input("Features", AttributeList)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    graph_name = "canvas"

    want_control_area = False

    settings_version = 1
    settingsHandler = DomainContextHandler()
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection = ContextSetting(set())

    xy_changed_manually = Signal(Variable, Variable)

    def __init__(self):
        # pylint: disable=missing-docstring
        super().__init__()

        self.data = self.discrete_data = None
        self.attrs = []
        self.input_features = None
        self.areas = []
        self.selection = set()

        self.attr_box = gui.hBox(self.mainArea)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE)
        combo_args = dict(
            widget=self.attr_box, master=self, contentsLength=12,
            searchable=True, sendSelectedValue=True,
            callback=self.attr_changed, model=self.domain_model)
        fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed)
        gui.comboBox(value="attr_x", **combo_args)
        gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size)
        gui.comboBox(value="attr_y", **combo_args)
        self.vizrank, self.vizrank_button = SieveRank.add_vizrank(
            self.attr_box, self, "Score Combinations", self.set_attr)
        self.vizrank_button.setSizePolicy(*fixed_size)

        self.canvas = QGraphicsScene(self)
        self.canvasView = ViewWithPress(
            self.canvas, self.mainArea, handler=self.reset_selection)
        self.mainArea.layout().addWidget(self.canvasView)
        self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

    def sizeHint(self):
        return QSize(450, 550)

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.update_graph()

    def showEvent(self, event):
        super().showEvent(event)
        self.update_graph()

    @classmethod
    def migrate_context(cls, context, version):
        if not version:
            settings.rename_setting(context, "attrX", "attr_x")
            settings.rename_setting(context, "attrY", "attr_y")
            settings.migrate_str_to_variable(context)

    @Inputs.data
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
            self.discrete_data = None
        else:
            self.domain_model.set_domain(data.domain)
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        if self.data:
            self.discrete_data = self.sparse_to_dense(data, True)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1 and
            len(self.data.domain.attributes) > 1 and not self.data.is_sparse())

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def attr_changed(self):
        self.update_attr()
        self.xy_changed_manually.emit(self.attr_x, self.attr_y)

    def update_attr(self):
        """Update the graph and selection."""
        self.selection = set()
        self.discrete_data = self.sparse_to_dense(self.data)
        self.update_graph()
        self.update_selection()

    def sparse_to_dense(self, data, init=False):
        """
        Extracts two selected columns from sparse matrix.
        GH-2260
        """
        def discretizer(data):
            if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)):
                discretize = Discretize(
                    method=EqualFreq(n=4), remove_const=False,
                    discretize_classes=True, discretize_metas=True)
                return discretize(data).to_dense()
            return data

        if not data.is_sparse() and not init:
            return self.discrete_data
        if data.is_sparse():
            attrs = {self.attr_x,
                     self.attr_y}
            new_domain = data.domain.select_columns(attrs)
            data = Table.from_table(new_domain, data)
        return discretizer(data)

    @Inputs.features
    def set_input_features(self, attr_list):
        """
        Handler for the Features signal.

        The method stores the attributes and calls `resolve_shown_attributes`

        Args:
            attr_list (AttributeList): data from the signal
        """
        self.input_features = attr_list
        self.resolve_shown_attributes()
        self.update_selection()

    def resolve_shown_attributes(self):
        """
        Use the attributes from the input signal if the signal is present
        and at least two attributes appear in the domain. If there are
        multiple, use the first two. Combos are disabled if inputs are used.
        """
        self.warning()
        self.attr_box.setEnabled(True)
        self.vizrank.setEnabled(True)
        if not self.input_features:  # None or empty
            return
        features = [f for f in self.input_features if f in self.domain_model]
        if not features:
            self.warning(
                "Features from the input signal are not present in the data")
            return
        old_attrs = self.attr_x, self.attr_y
        self.attr_x, self.attr_y = [f for f in (features * 2)[:2]]
        self.attr_box.setEnabled(False)
        self.vizrank.setEnabled(False)
        if (self.attr_x, self.attr_y) != old_attrs:
            self.selection = set()
            self.update_graph()

    def reset_selection(self):
        self.selection = set()
        self.update_selection()

    def select_area(self, area, event):
        """
        Add or remove the clicked area from the selection

        Args:
            area (QRect): the area that is clicked
            event (QEvent): event description
        """
        if event.button() != Qt.LeftButton:
            return
        index = self.areas.index(area)
        if event.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection()

    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(create_annotated_table(self.data, []))
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attr_x.name, [val_x]),
                        filter.FilterDiscrete(self.attr_y.name, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx))

    def update_graph(self):
        # Function uses weird names like r, g, b, but it does it with utmost
        # caution, hence
        # pylint: disable=invalid-name
        """Update the graph."""

        def text(txt, *args, **kwargs):
            text = html_text = None
            if "max_width" in kwargs:
                text = txt
            else:
                html_text = to_html(txt)
            return CanvasText(self.canvas, text, html_text=html_text,
                              *args, **kwargs)

        def width(txt):
            return text(txt, 0, 0, show=False).boundingRect().width()

        def height(txt):
            return text(txt, 0, 0, show=False).boundingRect().height()

        def fmt(val):
            return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val)

        def show_pearson(rect, pearson, pen_width):
            """
            Color the given rectangle according to its corresponding
            standardized Pearson residual.

            Args:
                rect (QRect): the rectangle being drawn
                pearson (float): signed standardized pearson residual
                pen_width (int): pen width (bolder pen is used for selection)
            """
            r = rect.rect()
            x, y, w, h = r.x(), r.y(), r.width(), r.height()
            if w == 0 or h == 0:
                return

            r = b = 255
            if pearson > 0:
                r = g = max(255 - 20 * pearson, 55)
            elif pearson < 0:
                b = g = max(255 + 20 * pearson, 55)
            else:
                r = g = b = 224
            rect.setBrush(QBrush(QColor(r, g, b)))
            pen_color = QColor(255 * (r == 255), 255 * (g == 255),
                               255 * (b == 255))
            pen = QPen(pen_color, pen_width)
            rect.setPen(pen)
            if pearson > 0:
                pearson = min(pearson, 10)
                dist = 20 - 1.6 * pearson
            else:
                pearson = max(pearson, -10)
                dist = 20 - 8 * pearson
            pen.setWidth(1)

            def _offseted_line(ax, ay):
                r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w),
                                      y + (ay or h))
                self.canvas.addItem(r)
                r.setPen(pen)

            ax = dist
            while ax < w:
                _offseted_line(ax, 0)
                ax += dist

            ay = dist
            while ay < h:
                _offseted_line(0, ay)
                ay += dist

        def make_tooltip():
            """Create the tooltip. The function uses local variables from
            the enclosing scope."""
            # pylint: disable=undefined-loop-variable
            def _oper(attr, txt):
                if self.data.domain[attr.name] == ddomain[attr.name]:
                    return " = "
                return " " if txt[0] in "<≥" else " in "

            xt, yt = ["<b>{attr}{eq}{val_name}</b>: {obs}/{n} ({p:.0f} %)".format(
                attr=to_html(attr.name),
                eq=_oper(attr, val_name),
                val_name=to_html(val_name),
                obs=fmt(prob * n),
                n=int(n),
                p=100 * prob)
                      for attr, val_name, prob in [(attr_x, xval_name, chi.probs_x[x]),
                                                   (attr_y, yval_name, chi.probs_y[y])]]

            ct = """<b>combination of values: </b><br/>
                   &nbsp;&nbsp;&nbsp;expected {exp} ({p_exp:.0f} %)<br/>
                   &nbsp;&nbsp;&nbsp;observed {obs} ({p_obs:.0f} %)""".format(
                       exp=fmt(chi.expected[y, x]),
                       p_exp=100 * chi.expected[y, x] / n,
                       obs=fmt(chi.observed[y, x]),
                       p_obs=100 * chi.observed[y, x] / n)

            return f"{xt}<br/>{yt}<hr/>{ct}"


        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attr_x is None or self.attr_y is None:
            return

        ddomain = self.discrete_data.domain
        attr_x, attr_y = self.attr_x, self.attr_y
        disc_x, disc_y = ddomain[attr_x.name], ddomain[attr_y.name]
        view = self.canvasView

        chi = ChiSqStats(self.discrete_data, disc_x, disc_y)
        max_ylabel_w = max((width(val) for val in disc_y.values), default=0)
        max_ylabel_w = min(max_ylabel_w, 200)
        x_off = height(attr_y.name) + max_ylabel_w
        y_off = 15
        square_size = min(view.width() - x_off - 35, view.height() - y_off - 80)
        square_size = max(square_size, 10)
        self.canvasView.setSceneRect(0, 0, view.width(), view.height())
        if not disc_x.values or not disc_y.values:
            text_ = "Features {} and {} have no values".format(disc_x, disc_y) \
                if not disc_x.values and \
                   not disc_y.values and \
                          disc_x != disc_y \
                else \
                    "Feature {} has no values".format(
                        disc_x if not disc_x.values else disc_y)
            text(text_, view.width() / 2 + 70, view.height() / 2,
                 Qt.AlignRight | Qt.AlignVCenter)
            return
        n = chi.n
        curr_x = x_off
        max_xlabel_h = 0
        self.areas = []
        for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)):
            if px == 0:
                continue
            width = square_size * px

            curr_y = y_off
            for y in range(len(chi.probs_y) - 1, -1, -1):  # bottom-up order
                py = chi.probs_y[y]
                yval_name = disc_y.values[y]
                if py == 0:
                    continue
                height = square_size * py

                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(
                    self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4,
                    z=-10, onclick=self.select_area)
                rect.value_pair = x, y
                self.areas.append(rect)
                show_pearson(rect, chi.residuals[y, x], 3 * selected)
                rect.setToolTip(make_tooltip())

                if x == 0:
                    text(yval_name, x_off, curr_y + height / 2,
                         Qt.AlignRight | Qt.AlignVCenter)
                curr_y += height

            xl = text(xval_name, curr_x + width / 2, y_off + square_size,
                      Qt.AlignHCenter | Qt.AlignTop, max_width=width)
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)
            curr_x += width

        bottom = y_off + square_size + max_xlabel_h
        text(attr_y.name, 0, y_off + square_size / 2,
             Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True)
        text(attr_x.name, x_off + square_size / 2, bottom,
             Qt.AlignHCenter | Qt.AlignTop, bold=True)
        bottom += 30
        xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p),
                  0, bottom)
        # Assume similar height for both lines
        text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height())

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)
        return None

    def send_report(self):
        self.report_plot()
Beispiel #20
0
class OWTreeGraph(OWTreeViewer2D):
    """Graphical visualization of tree models"""

    name = "Tree Viewer"
    icon = "icons/TreeViewer.svg"
    priority = 35
    keywords = []

    class Inputs:
        # Had different input names before merging from
        # Classification/Regression tree variants
        tree = Input("Tree",
                     TreeModel,
                     replaces=["Classification Tree", "Regression Tree"])

    class Outputs:
        selected_data = Output("Selected Data",
                               Table,
                               default=True,
                               id="selected-data")
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME,
                                Table,
                                id="annotated-data")

    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    regression_colors = Setting(0)

    replaces = [
        "Orange.widgets.classify.owclassificationtreegraph.OWClassificationTreeGraph",
        "Orange.widgets.classify.owregressiontreegraph.OWRegressionTreeGraph"
    ]

    COL_OPTIONS = ["Default", "Number of instances", "Mean value", "Variance"]
    COL_DEFAULT, COL_INSTANCE, COL_MEAN, COL_VARIANCE = range(4)

    def __init__(self):
        super().__init__()
        self.domain = None
        self.dataset = None
        self.clf_dataset = None
        self.tree_adapter = None

        self.color_label = QLabel("Target class: ")
        combo = self.color_combo = gui.OrangeComboBox()
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)
        combo.setSizeAdjustPolicy(
            QComboBox.AdjustToMinimumContentsLengthWithIcon)
        combo.setMinimumContentsLength(8)
        combo.activated[int].connect(self.color_changed)
        self.display_box.layout().addRow(self.color_label, combo)

    def set_node_info(self):
        """Set the content of the node"""
        for node in self.scene.nodes():
            node.set_rect(QRectF())
            self.update_node_info(node)
        w = max([n.rect().width() for n in self.scene.nodes()] + [0])
        if w > self.max_node_width:
            w = self.max_node_width
        for node in self.scene.nodes():
            rect = node.rect()
            node.set_rect(QRectF(rect.x(), rect.y(), w, rect.height()))
        self.scene.fix_pos(self.root_node, 10, 10)

    def _update_node_info_attr_name(self, node, text):
        attr = self.tree_adapter.attribute(node.node_inst)
        if attr is not None:
            text += "<hr/>{}".format(attr.name)
        return text

    def activate_loaded_settings(self):
        if not self.model:
            return
        super().activate_loaded_settings()
        if self.domain.class_var.is_discrete:
            self.color_combo.setCurrentIndex(self.target_class_index)
            self.toggle_node_color_cls()
        else:
            self.color_combo.setCurrentIndex(self.regression_colors)
            self.toggle_node_color_reg()
        self.set_node_info()

    def color_changed(self, i):
        if self.domain.class_var.is_discrete:
            self.target_class_index = i
            self.toggle_node_color_cls()
            self.set_node_info()
        else:
            self.regression_colors = i
            self.toggle_node_color_reg()

    def toggle_node_size(self):
        self.set_node_info()
        self.scene.update()
        self.scene_view.repaint()

    def toggle_color_cls(self):
        self.toggle_node_color_cls()
        self.set_node_info()
        self.scene.update()

    def toggle_color_reg(self):
        self.toggle_node_color_reg()
        self.set_node_info()
        self.scene.update()

    @Inputs.tree
    def ctree(self, model=None):
        """Input signal handler"""
        self.clear_scene()
        self.color_combo.clear()
        self.closeContext()
        self.model = model
        self.target_class_index = 0
        if model is None:
            self.infolabel.setText('No tree.')
            self.root_node = None
            self.dataset = None
            self.tree_adapter = None
        else:
            self.tree_adapter = self._get_tree_adapter(model)
            self.domain = model.domain
            self.dataset = model.instances
            if self.dataset is not None and self.dataset.domain != self.domain:
                self.clf_dataset = self.dataset.transform(model.domain)
            else:
                self.clf_dataset = self.dataset
            class_var = self.domain.class_var
            if class_var.is_discrete:
                self.scene.colors = [QColor(*col) for col in class_var.colors]
                self.color_label.setText("Target class: ")
                self.color_combo.addItem("None")
                self.color_combo.addItems(self.domain.class_vars[0].values)
                self.color_combo.setCurrentIndex(self.target_class_index)
            else:
                self.scene.colors = \
                    ContinuousPaletteGenerator(*model.domain.class_var.colors)
                self.color_label.setText("Color by: ")
                self.color_combo.addItems(self.COL_OPTIONS)
                self.color_combo.setCurrentIndex(self.regression_colors)
            self.openContext(self.domain.class_var)
            # self.root_node = self.walkcreate(model.root, None)
            self.root_node = self.walkcreate(self.tree_adapter.root)
            self.infolabel.setText('{} nodes, {} leaves'.format(
                self.tree_adapter.num_nodes,
                len(self.tree_adapter.leaves(self.tree_adapter.root))))
        self.setup_scene()
        self.Outputs.selected_data.send(None)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, []))

    def walkcreate(self, node, parent=None):
        """Create a structure of tree nodes from the given model"""
        node_obj = TreeNode(self.tree_adapter, node, parent)
        self.scene.addItem(node_obj)
        if parent:
            edge = GraphicsEdge(node1=parent, node2=node_obj)
            self.scene.addItem(edge)
            parent.graph_add_edge(edge)
        for child_inst in self.tree_adapter.children(node):
            if child_inst is not None:
                self.walkcreate(child_inst, node_obj)
        return node_obj

    def node_tooltip(self, node):
        return "<br>".join(
            to_html(str(rule))
            for rule in self.tree_adapter.rules(node.node_inst))

    def update_selection(self):
        if self.model is None:
            return
        nodes = [
            item.node_inst for item in self.scene.selectedItems()
            if isinstance(item, TreeNode)
        ]
        data = self.tree_adapter.get_instances_in_nodes(nodes)
        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset,
                                   self.tree_adapter.get_indices(nodes)))

    def send_report(self):
        if not self.model:
            return
        items = [
            ("Tree size", self.infolabel.text()),
            (
                "Edge widths",
                ("Fixed", "Relative to root", "Relative to parent")[
                    # pylint: disable=invalid-sequence-index
                    self.line_width_method])
        ]
        if self.domain.class_var.is_discrete:
            items.append(("Target class", self.color_combo.currentText()))
        elif self.regression_colors != self.COL_DEFAULT:
            items.append(
                ("Color by", self.COL_OPTIONS[self.regression_colors]))
        self.report_items(items)
        self.report_plot(self.scene)

    def update_node_info(self, node):
        if self.domain.class_var.is_discrete:
            self.update_node_info_cls(node)
        else:
            self.update_node_info_reg(node)

    def update_node_info_cls(self, node):
        """Update the printed contents of the node for classification trees"""
        node_inst = node.node_inst
        distr = self.tree_adapter.get_distribution(node_inst)[0]
        total = self.tree_adapter.num_samples(node_inst)
        distr = distr / np.sum(distr)
        if self.target_class_index:
            tabs = distr[self.target_class_index - 1]
            text = ""
        else:
            modus = np.argmax(distr)
            tabs = distr[modus]
            text = f"{self.domain.class_vars[0].values[int(modus)]}<br/>"
        if tabs > 0.999:
            text += f"100%, {total}/{total}"
        else:
            text += f"{100 * tabs:2.1f}%, {int(total * tabs)}/{total}"

        text = self._update_node_info_attr_name(node, text)
        node.setHtml(
            f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>')

    def update_node_info_reg(self, node):
        """Update the printed contents of the node for regression trees"""
        node_inst = node.node_inst
        mean, var = self.tree_adapter.get_distribution(node_inst)[0]
        insts = self.tree_adapter.num_samples(node_inst)
        text = f"{mean:.1f} ± {var:.1f}<br/>"
        text += f"{insts} instances"
        text = self._update_node_info_attr_name(node, text)
        node.setHtml(
            f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>')

    def toggle_node_color_cls(self):
        """Update the node color for classification trees"""
        colors = self.scene.colors
        for node in self.scene.nodes():
            distr = node.tree_adapter.get_distribution(node.node_inst)[0]
            total = sum(distr)
            if self.target_class_index:
                p = distr[self.target_class_index - 1] / total
                color = colors[self.target_class_index - 1].lighter(200 -
                                                                    100 * p)
            else:
                modus = np.argmax(distr)
                p = distr[modus] / (total or 1)
                color = colors[int(modus)].lighter(300 - 200 * p)
            node.backgroundBrush = QBrush(color)
        self.scene.update()

    def toggle_node_color_reg(self):
        """Update the node color for regression trees"""
        def_color = QColor(192, 192, 255)
        if self.regression_colors == self.COL_DEFAULT:
            brush = QBrush(def_color.lighter(100))
            for node in self.scene.nodes():
                node.backgroundBrush = brush
        elif self.regression_colors == self.COL_INSTANCE:
            max_insts = len(
                self.tree_adapter.get_instances_in_nodes(
                    [self.tree_adapter.root]))
            for node in self.scene.nodes():
                node_insts = len(
                    self.tree_adapter.get_instances_in_nodes([node.node_inst]))
                node.backgroundBrush = QBrush(
                    def_color.lighter(120 - 20 * node_insts / max_insts))
        elif self.regression_colors == self.COL_MEAN:
            minv = np.nanmin(self.dataset.Y)
            maxv = np.nanmax(self.dataset.Y)
            fact = 1 / (maxv - minv) if minv != maxv else 1
            colors = self.scene.colors
            for node in self.scene.nodes():
                node_mean = self.tree_adapter.get_distribution(
                    node.node_inst)[0][0]
                node.backgroundBrush = QBrush(colors[fact *
                                                     (node_mean - minv)])
        else:
            nodes = list(self.scene.nodes())
            variances = [
                self.tree_adapter.get_distribution(node.node_inst)[0][1]
                for node in nodes
            ]
            max_var = max(variances)
            for node, var in zip(nodes, variances):
                node.backgroundBrush = QBrush(
                    def_color.lighter(120 - 20 * var / max_var))
        self.scene.update()

    def _get_tree_adapter(self, model):
        if isinstance(model, SklModel):
            return SklTreeAdapter(model)
        return TreeAdapter(model)
Beispiel #21
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102
    keywords = []

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table)
        scorer = Input("Scorer", score.Scorer, multiple=True)

    class Outputs:
        reduced_data = Output("Reduced Data", Table, default=True)
        scores = Output("Scores", Table)
        features = Output("Features", AttributeList, dynamic=False)

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    nSelected = ContextSetting(5)
    auto_apply = Setting(True)

    sorting = Setting((0, Qt.DescendingOrder))
    selected_methods = Setting(set())

    settings_version = 2
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])
    selectionMethod = ContextSetting(SelectNBest)

    class Information(OWWidget.Information):
        no_target_var = Msg("Data does not have a single target variable. "
                            "You can still connect in unsupervised scorers "
                            "such as PCA.")
        missings_imputed = Msg('Missing values will be imputed as needed.')

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("Scorer {} inadequate: {}")
        no_attributes = Msg("Data does not have a single attribute.")

    def __init__(self):
        super().__init__()
        self.scorers = OrderedDict()
        self.out_domain_desc = None
        self.data = None
        self.problem_type_mode = ProblemType.CLASSIFICATION

        if not self.selected_methods:
            self.selected_methods = {
                method.name
                for method in SCORES if method.is_default
            }

        # GUI

        self.ranksModel = model = TableModel(parent=self)  # type: TableModel
        self.ranksView = view = TableView(self)  # type: TableView
        self.mainArea.layout().addWidget(view)
        view.setModel(model)
        view.setColumnWidth(0, 30)
        view.selectionModel().selectionChanged.connect(self.on_select)

        def _set_select_manual():
            self.setSelectionMethod(OWRank.SelectManual)

        view.pressed.connect(_set_select_manual)
        view.verticalHeader().sectionClicked.connect(_set_select_manual)
        view.horizontalHeader().sectionClicked.connect(self.headerClick)

        self.measuresStack = stacked = QStackedWidget(self)
        self.controlArea.layout().addWidget(stacked)

        for scoring_methods in (CLS_SCORES, REG_SCORES, []):
            box = gui.vBox(None,
                           "Scoring Methods" if scoring_methods else None)
            stacked.addWidget(box)
            for method in scoring_methods:
                box.layout().addWidget(
                    QCheckBox(
                        method.name,
                        self,
                        objectName=method.
                        shortname,  # To be easily found in tests
                        checked=method.name in self.selected_methods,
                        stateChanged=partial(self.methodSelectionChanged,
                                             method_name=method.name)))
            gui.rubber(box)

        gui.rubber(self.controlArea)
        self.switchProblemType(ProblemType.CLASSIFICATION)

        selMethBox = gui.vBox(self.controlArea,
                              "Select Attributes",
                              addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(
            selMethBox,
            self,
            "nSelected",
            1,
            999,
            callback=lambda: self.setSelectionMethod(OWRank.SelectNBest))

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectionMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_send(selMethBox, self, "auto_apply", box=False)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        self.resize(690, 500)

    def switchProblemType(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.measuresStack.setCurrentIndex(index)
        self.problem_type_mode = index

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selected_rows = []
        self.ranksModel.clear()
        self.ranksModel.resetSorting(True)

        self.get_method_scores.cache_clear()  # pylint: disable=no-member
        self.get_scorer_scores.cache_clear()  # pylint: disable=no-member

        self.Error.clear()
        self.Information.clear()
        self.Information.missings_imputed(
            shown=data is not None and data.has_missing())

        if data is not None and not data.domain.attributes:
            data = None
            self.Error.no_attributes()
        self.data = data
        self.switchProblemType(ProblemType.CLASSIFICATION)
        if self.data is not None:
            domain = self.data.domain
            self.info.set_input_summary(len(self.data),
                                        format_summary_details(self.data))

            if domain.has_discrete_class:
                problem_type = ProblemType.CLASSIFICATION
            elif domain.has_continuous_class:
                problem_type = ProblemType.REGRESSION
            elif not domain.class_var:
                self.Information.no_target_var()
                problem_type = ProblemType.UNSUPERVISED
            else:
                # This can happen?
                self.Error.invalid_type(type(domain.class_var).__name__)
                problem_type = None

            if problem_type is not None:
                self.switchProblemType(problem_type)

            self.ranksModel.setVerticalHeaderLabels(domain.attributes)
            self.ranksView.setVHeaderFixedWidthFromLabel(
                max((a.name for a in domain.attributes), key=len))

            self.selectionMethod = OWRank.SelectNBest
        else:
            self.info.set_input_summary(self.info.NoInput)

        self.openContext(data)
        self.selectButtons.button(self.selectionMethod).setChecked(True)

    def handleNewSignals(self):
        self.setStatusMessage('Running')
        self.updateScores()
        self.setStatusMessage('')
        self.on_select()

    @Inputs.scorer
    def set_learner(self, scorer, id):  # pylint: disable=redefined-builtin
        if scorer is None:
            self.scorers.pop(id, None)
        else:
            # Avoid caching a (possibly stale) previous instance of the same
            # Scorer passed via the same signal
            if id in self.scorers:
                # pylint: disable=no-member
                self.get_scorer_scores.cache_clear()

            self.scorers[id] = ScoreMeta(
                scorer.name, scorer.name, scorer,
                ProblemType.from_variable(scorer.class_type), False)

    @memoize_method()
    def get_method_scores(self, method):
        # These errors often happen, but they result in nans, which
        # are handled correctly by the widget
        estimator = method.scorer()
        data = self.data
        try:
            scores = np.asarray(estimator(data))
        except ValueError:
            try:
                scores = np.array(
                    [estimator(data, attr) for attr in data.domain.attributes])
            except ValueError:
                log.error("%s doesn't work on this data", method.name)
                scores = np.full(len(data.domain.attributes), np.nan)
            else:
                log.warning(
                    "%s had to be computed separately for each "
                    "variable", method.name)
        return scores

    @memoize_method()
    def get_scorer_scores(self, scorer):
        try:
            scores = scorer.scorer.score_data(self.data).T
        except (ValueError, TypeError):
            log.error("%s doesn't work on this data", scorer.name)
            scores = np.full((len(self.data.domain.attributes), 1), np.nan)

        labels = ((scorer.shortname, ) if scores.shape[1] == 1 else tuple(
            scorer.shortname + '_' + str(i)
            for i in range(1, 1 + scores.shape[1])))
        return scores, labels

    def updateScores(self):
        if self.data is None:
            self.ranksModel.clear()
            self.Outputs.scores.send(None)
            return

        methods = [
            method for method in SCORES if
            (method.name in self.selected_methods
             and method.problem_type == self.problem_type_mode and
             (not issparse(self.data.X) or method.scorer.supports_sparse_data))
        ]

        scorers = []
        self.Error.inadequate_learner.clear()
        for scorer in self.scorers.values():
            if scorer.problem_type in (self.problem_type_mode,
                                       ProblemType.UNSUPERVISED):
                scorers.append(scorer)
            else:
                self.Error.inadequate_learner(scorer.name,
                                              scorer.learner_adequacy_err_msg)

        method_scores = tuple(
            self.get_method_scores(method) for method in methods)

        scorer_scores, scorer_labels = (), ()
        if scorers:
            scorer_scores, scorer_labels = zip(*(self.get_scorer_scores(scorer)
                                                 for scorer in scorers))
            scorer_labels = tuple(chain.from_iterable(scorer_labels))

        labels = tuple(method.shortname for method in methods) + scorer_labels
        model_array = np.column_stack(([
            len(a.values) if a.is_discrete else np.nan
            for a in self.data.domain.attributes
        ], ) + (method_scores if method_scores else ()) +
                                      (scorer_scores if scorer_scores else ()))
        for column, values in enumerate(model_array.T):
            self.ranksModel.setExtremesFrom(column, values)

        self.ranksModel.wrap(model_array.tolist())
        self.ranksModel.setHorizontalHeaderLabels(('#', ) + labels)
        self.ranksView.setColumnWidth(0, 40)

        # Re-apply sort
        try:
            sort_column, sort_order = self.sorting
            if sort_column < len(labels):
                # adds 1 for '#' (discrete count) column
                self.ranksModel.sort(sort_column + 1, sort_order)
                self.ranksView.horizontalHeader().setSortIndicator(
                    sort_column + 1, sort_order)
        except ValueError:
            pass

        self.autoSelection()
        self.Outputs.scores.send(self.create_scores_table(labels))

    def on_select(self):
        # Save indices of attributes in the original, unsorted domain
        self.selected_rows = list(
            self.ranksModel.mapToSourceRows([
                i.row()
                for i in self.ranksView.selectionModel().selectedRows(0)
            ]))
        self.commit()

    def setSelectionMethod(self, method):
        self.selectionMethod = method
        self.selectButtons.button(method).setChecked(True)
        self.autoSelection()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        model = self.ranksModel
        rowCount = model.rowCount()
        columnCount = model.columnCount()

        if self.selectionMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectionMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0), model.index(rowCount - 1, columnCount - 1))
        elif self.selectionMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0), model.index(nSelected - 1, columnCount - 1))
        else:
            selection = QItemSelection()
            if self.selected_rows is not None:
                for row in model.mapFromSourceRows(self.selected_rows):
                    selection.append(
                        QItemSelectionRange(model.index(row, 0),
                                            model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectionMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        sort_order = self.ranksModel.sortOrder()
        sort_column = self.ranksModel.sortColumn(
        ) - 1  # -1 for '#' (discrete count) column
        self.sorting = (sort_column, sort_order)

    def methodSelectionChanged(self, state, method_name):
        if state == Qt.Checked:
            self.selected_methods.add(method_name)
        elif method_name in self.selected_methods:
            self.selected_methods.remove(method_name)

        self.updateScores()

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        selected_attrs = []
        if self.data is not None:
            selected_attrs = [
                self.data.domain.attributes[i] for i in self.selected_rows
            ]
        if not selected_attrs:
            self.Outputs.reduced_data.send(None)
            self.Outputs.features.send(None)
            self.out_domain_desc = None
            self.info.set_output_summary(self.info.NoOutput)
        else:
            reduced_domain = Domain(selected_attrs, self.data.domain.class_var,
                                    self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)
            self.Outputs.features.send(AttributeList(selected_attrs))
            self.out_domain_desc = report.describe_domain(data.domain)
            self.info.set_output_summary(len(data),
                                         format_summary_details(data))

    def create_scores_table(self, labels):
        model_list = self.ranksModel.tolist()
        if not model_list or len(
                model_list[0]) == 1:  # Empty or just n_values column
            return None

        domain = Domain([ContinuousVariable(label) for label in labels],
                        metas=[StringVariable("Feature")])

        # Prevent np.inf scores
        finfo = np.finfo(np.float64)
        scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max)

        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        # If older settings, restore sort header to default
        # Saved selected_rows will likely be incorrect
        if version is None or version < 2:
            column, order = 0, Qt.DescendingOrder
            headerState = settings.pop("headerState", None)

            # Lacking knowledge of last problemType, use discrete ranks view's ordering
            if isinstance(headerState, (tuple, list)):
                headerState = headerState[0]

            if isinstance(headerState, bytes):
                hview = QHeaderView(Qt.Horizontal)
                hview.restoreState(headerState)
                column, order = hview.sortIndicatorSection(
                ) - 1, hview.sortIndicatorOrder()
            settings["sorting"] = (column, order)

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 2:
            # Old selection was saved as sorted indices. New selection is original indices.
            # Since we can't devise the latter without first computing the ranks,
            # just reset the selection to avoid confusion.
            context.values['selected_rows'] = []
Beispiel #22
0
class ImagePlot(QWidget, OWComponent, SelectionGroupMixin,
                ImageColorSettingMixin, ImageZoomMixin, ConcurrentMixin):

    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    gamma = Setting(0)

    selection_changed = Signal()
    image_updated = Signal()

    def __init__(self, parent):
        QWidget.__init__(self)
        OWComponent.__init__(self, parent)
        SelectionGroupMixin.__init__(self)
        ImageColorSettingMixin.__init__(self)
        ImageZoomMixin.__init__(self)
        ConcurrentMixin.__init__(self)
        self.parent = parent

        self.selection_type = SELECTMANY
        self.saving_enabled = True
        self.selection_enabled = True
        self.viewtype = INDIVIDUAL  # required bt InteractiveViewBox
        self.highlighted = None
        self.data_points = None
        self.data_values = None
        self.data_imagepixels = None
        self.data_valid_positions = None

        self.plotview = pg.GraphicsLayoutWidget()
        self.plot = pg.PlotItem(background="w",
                                viewBox=InteractiveViewBox(self))
        self.plotview.addItem(self.plot)

        self.legend = ImageColorLegend()
        self.plotview.addItem(self.legend)

        self.plot.scene().installEventFilter(
            HelpEventDelegate(self.help_event, self))

        layout = QVBoxLayout()
        self.setLayout(layout)
        self.layout().setContentsMargins(0, 0, 0, 0)
        self.layout().addWidget(self.plotview)

        self.img = ImageItemNan()
        self.img.setOpts(axisOrder='row-major')
        self.plot.addItem(self.img)
        self.plot.vb.setAspectLocked()
        self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent)

        layout = QGridLayout()
        self.plotview.setLayout(layout)
        self.button = QPushButton("Menu", self.plotview)
        self.button.setAutoDefault(False)

        layout.setRowStretch(1, 1)
        layout.setColumnStretch(1, 1)
        layout.addWidget(self.button, 0, 0)
        view_menu = MenuFocus(self)
        self.button.setMenu(view_menu)

        # prepare interface according to the new context
        self.parent.contextAboutToBeOpened.connect(
            lambda x: self.init_interface_data(x[0]))

        actions = []

        self.add_zoom_actions(view_menu)

        select_square = QAction(
            "Select (square)",
            self,
            triggered=self.plot.vb.set_mode_select_square,
        )
        select_square.setShortcuts([Qt.Key_S])
        select_square.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(select_square)

        select_polygon = QAction(
            "Select (polygon)",
            self,
            triggered=self.plot.vb.set_mode_select_polygon,
        )
        select_polygon.setShortcuts([Qt.Key_P])
        select_polygon.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(select_polygon)

        if self.saving_enabled:
            save_graph = QAction(
                "Save graph",
                self,
                triggered=self.save_graph,
            )
            save_graph.setShortcuts(
                [QKeySequence(Qt.ControlModifier | Qt.Key_I)])
            actions.append(save_graph)

        view_menu.addActions(actions)
        self.addActions(actions)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True)

        choose_xy = QWidgetAction(self)
        box = gui.vBox(self)
        box.setFocusPolicy(Qt.TabFocus)
        self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                    valid_types=DomainModel.PRIMITIVE)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)
        box.setFocusProxy(self.cb_attr_x)

        box.layout().addWidget(self.color_settings_box())

        choose_xy.setDefaultWidget(box)
        view_menu.addAction(choose_xy)

        self.lsx = None  # info about the X axis
        self.lsy = None  # info about the Y axis

        self.data = None
        self.data_ids = {}

    def init_interface_data(self, data):
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def help_event(self, ev):
        pos = self.plot.vb.mapSceneToView(ev.scenePos())
        sel = self._points_at_pos(pos)
        prepared = []
        if sel is not None:
            data, vals, points = self.data[sel], self.data_values[
                sel], self.data_points[sel]
            for d, v, p in zip(data, vals, points):
                basic = "({}, {}): {}".format(p[0], p[1], v)
                variables = [
                    v for v in self.data.domain.metas +
                    self.data.domain.class_vars
                    if v not in [self.attr_x, self.attr_y]
                ]
                features = [
                    '{} = {}'.format(attr.name, d[attr]) for attr in variables
                ]
                prepared.append("\n".join([basic] + features))
        text = "\n\n".join(prepared)
        if text:
            text = ('<span style="white-space:pre">{}</span>'.format(
                escape(text)))
            QToolTip.showText(ev.screenPos(), text, widget=self.plotview)
            return True
        else:
            return False

    def update_attr(self):
        self.update_view()

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def save_graph(self):
        saveplot.save_plot(self.plotview, self.parent.graph_writers)

    def set_data(self, data):
        if data:
            self.data = data
            self.data_ids = {e: i for i, e in enumerate(data.ids)}
            self.restore_selection_settings()
        else:
            self.data = None
            self.data_ids = {}

    def refresh_img_selection(self):
        selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8)
        selected_px[self.data_imagepixels[self.data_valid_positions, 0],
                    self.data_imagepixels[self.data_valid_positions, 1]] = \
            self.selection_group[self.data_valid_positions]
        self.img.setSelection(selected_px)

    def make_selection(self, selected):
        """Add selected indices to the selection."""
        add_to_group, add_group, remove = selection_modifiers()
        if self.data and self.lsx and self.lsy:
            if add_to_group:  # both keys - need to test it before add_group
                selnum = np.max(self.selection_group)
            elif add_group:
                selnum = np.max(self.selection_group) + 1
            elif remove:
                selnum = 0
            else:
                self.selection_group *= 0
                selnum = 1
            if selected is not None:
                self.selection_group[selected] = selnum
            self.refresh_img_selection()
        self.prepare_settings_for_saving()
        self.selection_changed.emit()

    def select_square(self, p1, p2):
        """ Select elements within a square drawn by the user.
        A selection needs to contain whole pixels """
        x1, y1 = p1.x(), p1.y()
        x2, y2 = p2.x(), p2.y()
        polygon = [
            QPointF(x1, y1),
            QPointF(x2, y1),
            QPointF(x2, y2),
            QPointF(x1, y2),
            QPointF(x1, y1)
        ]
        self.select_polygon(polygon)

    def select_polygon(self, polygon):
        """ Select by a polygon which has to contain whole pixels. """
        if self.data and self.lsx and self.lsy:
            polygon = [(p.x(), p.y()) for p in polygon]
            # a polygon should contain all pixel
            shiftx = _shift(self.lsx)
            shifty = _shift(self.lsy)
            points_edges = [
                self.data_points + [[shiftx, shifty]],
                self.data_points + [[-shiftx, shifty]],
                self.data_points + [[shiftx, -shifty]],
                self.data_points + [[-shiftx, -shifty]]
            ]
            inp = in_polygon(points_edges[0], polygon)
            for p in points_edges[1:]:
                inp *= in_polygon(p, polygon)
            self.make_selection(inp)

    def _points_at_pos(self, pos):
        if self.data and self.lsx and self.lsy:
            x, y = pos.x(), pos.y()
            distance = np.abs(self.data_points - [[x, y]])
            sel = (distance[:, 0] < _shift(self.lsx)) * (distance[:, 1] <
                                                         _shift(self.lsy))
            return sel

    def select_by_click(self, pos):
        sel = self._points_at_pos(pos)
        self.make_selection(sel)

    def update_view(self):
        self.cancel()
        self.parent.Error.image_too_big.clear()
        self.parent.Information.not_shown.clear()
        self.img.clear()
        self.img.setSelection(None)
        self.legend.set_colors(None)
        self.lsx = None
        self.lsy = None
        self.data_points = None
        self.data_values = None
        self.data_imagepixels = None
        self.data_valid_positions = None

        if self.data and self.attr_x and self.attr_y:
            self.start(self.compute_image, self.data, self.attr_x, self.attr_y,
                       self.parent.integrate_fn())
        else:
            self.image_updated.emit()

    @staticmethod
    def compute_image(data: Orange.data.Table, attr_x, attr_y, integrate_fn,
                      state: TaskState):
        def progress_interrupt(i: float):
            if state.is_interruption_requested():
                raise InterruptException

        class Result():
            pass

        res = Result()

        xat = data.domain[attr_x]
        yat = data.domain[attr_y]

        ndom = Domain([xat, yat])
        datam = data.transform(ndom)
        progress_interrupt(0)
        res.coorx = datam.X[:, 0]
        res.coory = datam.X[:, 1]
        res.data_points = datam.X
        res.lsx = lsx = values_to_linspace(res.coorx)
        res.lsy = lsy = values_to_linspace(res.coory)
        progress_interrupt(0)

        if lsx[-1] * lsy[-1] > IMAGE_TOO_BIG:
            raise ImageTooBigException((lsx[-1], lsy[-1]))

        # the code bellow does this, but part-wise:
        # d = integrate_fn(data).X[:, 0]
        parts = []
        for slice in split_to_size(len(data), 10000):
            part = integrate_fn(data[slice]).X[:, 0]
            parts.append(part)
            progress_interrupt(0)
        d = np.concatenate(parts)

        res.d = d
        progress_interrupt(0)

        return res

    def on_done(self, res):

        self.lsx, self.lsy = res.lsx, res.lsy
        lsx, lsy = self.lsx, self.lsy

        d = res.d

        self.data_points = res.data_points

        xindex, xnan = index_values_nan(res.coorx, self.lsx)
        yindex, ynan = index_values_nan(res.coory, self.lsy)
        self.data_valid_positions = valid = np.logical_not(
            np.logical_or(xnan, ynan))
        invalid_positions = len(d) - np.sum(valid)
        if invalid_positions:
            self.parent.Information.not_shown(invalid_positions)

        imdata = np.ones((lsy[2], lsx[2])) * float("nan")
        imdata[yindex[valid], xindex[valid]] = d[valid]
        self.data_values = d
        self.data_imagepixels = np.vstack((yindex, xindex)).T

        self.img.setImage(imdata, autoLevels=False)
        self.update_levels()
        self.update_color_schema()

        # shift centres of the pixels so that the axes are useful
        shiftx = _shift(lsx)
        shifty = _shift(lsy)
        left = lsx[0] - shiftx
        bottom = lsy[0] - shifty
        width = (lsx[1] - lsx[0]) + 2 * shiftx
        height = (lsy[1] - lsy[0]) + 2 * shifty
        self.img.setRect(QRectF(left, bottom, width, height))

        self.refresh_img_selection()
        self.image_updated.emit()

    def on_partial_result(self, result):
        pass

    def on_exception(self, ex: Exception):
        if isinstance(ex, InterruptException):
            return

        if isinstance(ex, ImageTooBigException):
            self.parent.Error.image_too_big(ex.args[0][0], ex.args[0][1])
            self.image_updated.emit()
        else:
            raise ex
class OWFeatureStatistics(widget.OWWidget):
    name = 'Feature Statistics'
    description = 'Show basic statistics for data features.'
    icon = 'icons/FeatureStatistics.svg'

    class Inputs:
        data = Input('Data', Table, default=True)

    class Outputs:
        reduced_data = Output('Reduced Data', Table, default=True)
        statistics = Output('Statistics', Table)

    want_main_area = True
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()

    auto_commit = ContextSetting(True)
    color_var = ContextSetting(None)  # type: Optional[Variable]
    # filter_string = ContextSetting('')

    sorting = ContextSetting((0, Qt.DescendingOrder))
    selected_rows = ContextSetting([])

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]

        # Information panel
        info_box = gui.vBox(self.controlArea, 'Info')
        info_box.setMinimumWidth(200)
        self.info_summary = gui.widgetLabel(info_box, wordWrap=True)
        self.info_attr = gui.widgetLabel(info_box, wordWrap=True)
        self.info_class = gui.widgetLabel(info_box, wordWrap=True)
        self.info_meta = gui.widgetLabel(info_box, wordWrap=True)
        self.set_info()

        # TODO: Implement filtering on the model
        # filter_box = gui.vBox(self.controlArea, 'Filter')
        # self.filter_text = gui.lineEdit(
        #     filter_box, self, value='filter_string',
        #     placeholderText='Filter variables by name',
        #     callback=self._filter_table_variables, callbackOnType=True,
        # )
        # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus)
        # shortcut.setWhatsThis('Filter variables by name')

        self.color_var_model = DomainModel(
            valid_types=(ContinuousVariable, DiscreteVariable),
            placeholder='None',
        )
        box = gui.vBox(self.controlArea, 'Histogram')
        self.cb_color_var = gui.comboBox(
            box,
            master=self,
            value='color_var',
            model=self.color_var_model,
            label='Color:',
            orientation=Qt.Horizontal,
        )
        self.cb_color_var.activated.connect(self.__color_var_changed)

        gui.rubber(self.controlArea)
        gui.auto_commit(
            self.buttonsArea,
            self,
            'auto_commit',
            'Send Selected Rows',
            'Send Automatically',
        )

        # Main area
        self.model = FeatureStatisticsTableModel(parent=self)
        self.table_view = FeatureStatisticsTableView(self.model, parent=self)
        self.table_view.selectionModel().selectionChanged.connect(
            self.on_select)
        self.table_view.horizontalHeader().sectionClicked.connect(
            self.on_header_click)

        self.mainArea.layout().addWidget(self.table_view)

    def sizeHint(self):
        return QSize(1050, 500)

    def _filter_table_variables(self):
        regex = QRegExp(self.filter_string)
        # If the user explicitly types different cases, we assume they know
        # what they are searching for and account for letter case in filter
        different_case = (any(c.islower() for c in self.filter_string)
                          and any(c.isupper() for c in self.filter_string))
        if not different_case:
            regex.setCaseSensitivity(Qt.CaseInsensitive)

    @Inputs.data
    def set_data(self, data):
        # Clear outputs and reset widget state
        self.closeContext()
        self.selected_rows = []
        self.model.resetSorting()
        self.Outputs.reduced_data.send(None)
        self.Outputs.statistics.send(None)

        # Setup widget state for new data and restore settings
        self.data = data

        if data is not None:
            self.color_var_model.set_domain(data.domain)
            self.color_var = None
            if self.data.domain.class_vars:
                self.color_var = self.data.domain.class_vars[0]
        else:
            self.color_var_model.set_domain(None)
            self.color_var = None
        self.model.set_data(data)

        self.openContext(self.data)
        self.__restore_selection()
        self.__restore_sorting()
        # self._filter_table_variables()
        self.__color_var_changed()

        self.set_info()
        self.commit()

    def __restore_selection(self):
        """Restore the selection on the table view from saved settings."""
        selection_model = self.table_view.selectionModel()
        selection = QItemSelection()
        if len(self.selected_rows):
            for row in self.model.mapFromSourceRows(self.selected_rows):
                selection.append(
                    QItemSelectionRange(
                        self.model.index(row, 0),
                        self.model.index(row,
                                         self.model.columnCount() - 1)))
        selection_model.select(selection, QItemSelectionModel.ClearAndSelect)

    def __restore_sorting(self):
        """Restore the sort column and order from saved settings."""
        sort_column, sort_order = self.sorting
        if self.data is not None and sort_column < self.model.columnCount():
            self.model.sort(sort_column, sort_order)
            self.table_view.horizontalHeader().setSortIndicator(
                sort_column, sort_order)

    @pyqtSlot(int)
    def on_header_click(self, *_):
        # Store the header states
        sort_order = self.model.sortOrder()
        sort_column = self.model.sortColumn()
        self.sorting = sort_column, sort_order

    @pyqtSlot(int)
    def __color_var_changed(self, *_):
        if self.model is not None:
            self.model.set_target_var(self.color_var)

    def _format_variables_string(self, variables):
        agg = []
        for var_type_name, var_type in [('categorical', DiscreteVariable),
                                        ('numeric', ContinuousVariable),
                                        ('time', TimeVariable),
                                        ('string', StringVariable)]:
            # Disable pylint here because a `TimeVariable` is also a
            # `ContinuousVariable`, and should be labelled as such. That is why
            # it is necessary to check the type this way instead of using
            # `isinstance`, which would fail in the above case
            var_type_list = [v for v in variables if type(v) is var_type]  # pylint: disable=unidiomatic-typecheck
            if var_type_list:
                shown = var_type in self.model.HIDDEN_VAR_TYPES
                agg.append(('%d %s%s' % (len(var_type_list), var_type_name,
                                         ['', ' (not shown)'][shown]),
                            len(var_type_list)))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))

    def set_info(self):
        if self.data is not None:
            self.info_summary.setText(
                '<b>%s</b> contains %s with %s' %
                (self.data.name,
                 plural('{number} instance{s}', self.model.n_instances),
                 plural('{number} feature{s}', self.model.n_attributes)))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes))
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars))
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas))
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')

    def on_select(self):
        self.selected_rows = self.model.mapToSourceRows(
            [i.row() for i in self.table_view.selectionModel().selectedRows()])
        self.commit()

    def commit(self):
        if not len(self.selected_rows):
            self.Outputs.reduced_data.send(None)
            self.Outputs.statistics.send(None)
            return

        # Send a table with only selected columns to output
        variables = self.model.variables[self.selected_rows]
        self.Outputs.reduced_data.send(self.data[:, variables])

        # Send the statistics of the selected variables to ouput
        labels, data = self.model.get_statistics_matrix(variables,
                                                        return_labels=True)
        var_names = np.atleast_2d([var.name for var in variables]).T
        domain = Domain(
            attributes=[ContinuousVariable(name) for name in labels],
            metas=[StringVariable('Feature')])
        statistics = Table(domain, data, metas=var_names)
        statistics.name = '%s (Feature Statistics)' % self.data.name
        self.Outputs.statistics.send(statistics)

    def send_report(self):
        pass
Beispiel #24
0
class OWHyper(OWWidget):
    name = "HyperSpectra"

    class Inputs:
        data = Input("Data", Orange.data.Table, default=True)

    class Outputs:
        selected_data = Output("Selection", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    icon = "icons/hyper.svg"
    priority = 20
    replaces = ["orangecontrib.infrared.widgets.owhyper.OWHyper"]

    settings_version = 4
    settingsHandler = DomainContextHandler()

    imageplot = SettingProvider(ImagePlot)
    curveplot = SettingProvider(CurvePlotHyper)

    integration_method = Setting(0)
    integration_methods = Integrate.INTEGRALS
    value_type = Setting(0)
    attr_value = ContextSetting(None)

    lowlim = Setting(None)
    highlim = Setting(None)
    choose = Setting(None)

    graph_name = "imageplot.plotview"  # defined so that the save button is shown

    class Warning(OWWidget.Warning):
        threshold_error = Msg("Low slider should be less than High")

    class Error(OWWidget.Error):
        image_too_big = Msg("Image for chosen features is too big ({} x {}).")

    class Information(OWWidget.Information):
        not_shown = Msg("Undefined positions: {} data point(s) are not shown.")

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            # delete the saved attr_value to prevent crashes
            try:
                del settings_["context_settings"][0].values["attr_value"]
            except:  # pylint: disable=bare-except
                pass

        # migrate selection
        if version <= 2:
            try:
                current_context = settings_["context_settings"][0]
                selection = getattr(current_context, "selection", None)
                if selection is not None:
                    selection = [(i, 1)
                                 for i in np.flatnonzero(np.array(selection))]
                    settings_.setdefault(
                        "imageplot", {})["selection_group_saved"] = selection
            except:  # pylint: disable=bare-except
                pass

    @classmethod
    def migrate_context(cls, context, version):
        if version <= 3 and "curveplot" in context.values:
            CurvePlot.migrate_context_sub_feature_color(
                context.values["curveplot"], version)

    def __init__(self):
        super().__init__()

        dbox = gui.widgetBox(self.controlArea, "Image values")

        rbox = gui.radioButtons(dbox,
                                self,
                                "value_type",
                                callback=self._change_integration)

        gui.appendRadioButton(rbox, "From spectra")

        self.box_values_spectra = gui.indentedBox(rbox)

        gui.comboBox(self.box_values_spectra,
                     self,
                     "integration_method",
                     items=(a.name for a in self.integration_methods),
                     callback=self._change_integral_type)
        gui.rubber(self.controlArea)

        gui.appendRadioButton(rbox, "Use feature")

        self.box_values_feature = gui.indentedBox(rbox)

        self.feature_value_model = DomainModel(
            DomainModel.METAS | DomainModel.CLASSES,
            valid_types=DomainModel.PRIMITIVE)
        self.feature_value = gui.comboBox(self.box_values_feature,
                                          self,
                                          "attr_value",
                                          callback=self.update_feature_value,
                                          model=self.feature_value_model)

        splitter = QSplitter(self)
        splitter.setOrientation(Qt.Vertical)
        self.imageplot = ImagePlot(self)
        self.imageplot.selection_changed.connect(self.output_image_selection)

        self.curveplot = CurvePlotHyper(self, select=SELECTONE)
        self.curveplot.selection_changed.connect(self.redraw_integral_info)
        self.curveplot.plot.vb.x_padding = 0.005  # pad view so that lines are not hidden
        splitter.addWidget(self.imageplot)
        splitter.addWidget(self.curveplot)
        self.mainArea.layout().addWidget(splitter)

        self.line1 = MovableVline(position=self.lowlim,
                                  label="",
                                  report=self.curveplot)
        self.line1.sigMoved.connect(lambda v: setattr(self, "lowlim", v))
        self.line2 = MovableVline(position=self.highlim,
                                  label="",
                                  report=self.curveplot)
        self.line2.sigMoved.connect(lambda v: setattr(self, "highlim", v))
        self.line3 = MovableVline(position=self.choose,
                                  label="",
                                  report=self.curveplot)
        self.line3.sigMoved.connect(lambda v: setattr(self, "choose", v))
        for line in [self.line1, self.line2, self.line3]:
            line.sigMoveFinished.connect(self.changed_integral_range)
            self.curveplot.add_marking(line)
            line.hide()

        self.markings_integral = []

        self.data = None
        self.disable_integral_range = False

        self.resize(900, 700)
        self._update_integration_type()

        # prepare interface according to the new context
        self.contextAboutToBeOpened.connect(
            lambda x: self.init_interface_data(x[0]))

    def init_interface_data(self, data):
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def output_image_selection(self):
        if not self.data:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.curveplot.set_data(None)
            return

        indices = np.flatnonzero(self.imageplot.selection_group)

        annotated_data = groups_or_annotated_table(
            self.data, self.imageplot.selection_group)
        self.Outputs.annotated_data.send(annotated_data)

        selected = self.data[indices]
        self.Outputs.selected_data.send(selected if selected else None)
        if selected:
            self.curveplot.set_data(selected)
        else:
            self.curveplot.set_data(self.data)

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.feature_value_model.set_domain(domain)
        self.attr_value = self.feature_value_model[
            0] if self.feature_value_model else None

    def redraw_integral_info(self):
        di = {}
        integrate = self.integrate_fn()
        if isinstance(integrate, Integrate) and np.any(
                self.curveplot.selection_group):
            # curveplot can have a subset of curves on the input> match IDs
            ind = np.flatnonzero(self.curveplot.selection_group)[0]
            dind = self.imageplot.data_ids[self.curveplot.data[ind].id]
            dshow = self.data[dind:dind + 1]
            datai = integrate(dshow)
            draw_info = datai.domain.attributes[0].compute_value.draw_info
            di = draw_info(dshow)
        self.refresh_markings(di)

    def refresh_markings(self, di):
        refresh_integral_markings([{
            "draw": di
        }], self.markings_integral, self.curveplot)

    def integrate_fn(self):
        if self.value_type == 0:  # integrals
            imethod = self.integration_methods[self.integration_method]

            if imethod != Integrate.PeakAt:
                return Integrate(methods=imethod,
                                 limits=[[self.lowlim, self.highlim]])
            else:
                return Integrate(methods=imethod,
                                 limits=[[self.choose, self.choose]])
        else:
            return lambda data, attr=self.attr_value: \
                data.transform(Domain([data.domain[attr]]))

    def redraw_data(self):
        self.redraw_integral_info()
        self.imageplot.update_view()

    def update_feature_value(self):
        self.redraw_data()

    def _update_integration_type(self):
        self.line1.hide()
        self.line2.hide()
        self.line3.hide()
        if self.value_type == 0:
            self.box_values_spectra.setDisabled(False)
            self.box_values_feature.setDisabled(True)
            if self.integration_methods[
                    self.integration_method] != Integrate.PeakAt:
                self.line1.show()
                self.line2.show()
            else:
                self.line3.show()
        elif self.value_type == 1:
            self.box_values_spectra.setDisabled(True)
            self.box_values_feature.setDisabled(False)
        QTest.qWait(1)  # first update the interface

    def _change_integration(self):
        # change what to show on the image
        self._update_integration_type()
        self.redraw_data()

    def changed_integral_range(self):
        if self.disable_integral_range:
            return
        self.redraw_data()

    def _change_integral_type(self):
        self._change_integration()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()

        def valid_context(data):
            if data is None:
                return False
            annotation_features = [
                v for v in data.domain.metas + data.domain.class_vars
                if isinstance(v, (DiscreteVariable, ContinuousVariable))
            ]
            return len(annotation_features) >= 1

        if valid_context(data):
            self.openContext(data)
        else:
            # to generate valid interface even if context was not loaded
            self.contextAboutToBeOpened.emit([data])
        self.data = data
        self.imageplot.set_data(data)
        self.curveplot.set_data(data)
        self._init_integral_boundaries()
        self.imageplot.update_view()
        self.output_image_selection()

    def _init_integral_boundaries(self):
        # requires data in curveplot
        self.disable_integral_range = True
        if self.curveplot.data_x is not None and len(self.curveplot.data_x):
            minx = self.curveplot.data_x[0]
            maxx = self.curveplot.data_x[-1]
        else:
            minx = 0.
            maxx = 1.

        if self.lowlim is None or not minx <= self.lowlim <= maxx:
            self.lowlim = minx
        self.line1.setValue(self.lowlim)

        if self.highlim is None or not minx <= self.highlim <= maxx:
            self.highlim = maxx
        self.line2.setValue(self.highlim)

        if self.choose is None:
            self.choose = (minx + maxx) / 2
        elif self.choose < minx:
            self.choose = minx
        elif self.choose > maxx:
            self.choose = maxx
        self.line3.setValue(self.choose)
        self.disable_integral_range = False

    def save_graph(self):
        self.imageplot.save_graph()

    def onDeleteWidget(self):
        self.curveplot.shutdown()
        self.imageplot.shutdown()
        super().onDeleteWidget()
Beispiel #25
0
class OWNomogram(OWWidget):
    name = "Nomogram"
    description = " Nomograms for Visualization of Naive Bayesian" \
                  " and Logistic Regression Classifiers."
    icon = "icons/Nomogram.svg"
    priority = 2000
    keywords = []

    class Inputs:
        classifier = Input("Classifier", Model)
        data = Input("Data", Table)

    class Outputs:
        features = Output("Features", AttributeList)

    MAX_N_ATTRS = 1000
    POINT_SCALE = 0
    ALIGN_LEFT = 0
    ALIGN_ZERO = 1
    ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier)
    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    normalize_probabilities = Setting(False)
    scale = Setting(1)
    display_index = Setting(1)
    n_attributes = Setting(10)
    sort_index = Setting(SortBy.ABSOLUTE)
    cont_feature_dim_index = Setting(0)

    graph_name = "scene"

    class Error(OWWidget.Error):
        invalid_classifier = Msg("Nomogram accepts only Naive Bayes and "
                                 "Logistic Regression classifiers.")

    def __init__(self):
        super().__init__()
        self.instances = None
        self.domain = None
        self.data = None
        self.classifier = None
        self.align = OWNomogram.ALIGN_ZERO
        self.log_odds_ratios = []
        self.log_reg_coeffs = []
        self.log_reg_coeffs_orig = []
        self.log_reg_cont_data_extremes = []
        self.p = None
        self.b0 = None
        self.points = []
        self.feature_items = {}
        self.feature_marker_values = []
        self.scale_marker_values = lambda x: x
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.old_target_class_index = self.target_class_index
        self.repaint = False

        # GUI
        box = gui.vBox(self.controlArea, "Target class")
        self.class_combo = gui.comboBox(box,
                                        self,
                                        "target_class_index",
                                        callback=self._class_combo_changed,
                                        contentsLength=12)
        self.norm_check = gui.checkBox(
            box,
            self,
            "normalize_probabilities",
            "Normalize probabilities",
            hidden=True,
            callback=self.update_scene,
            tooltip="For multiclass data 1 vs. all probabilities do not"
            " sum to 1 and therefore could be normalized.")

        self.scale_radio = gui.radioButtons(self.controlArea,
                                            self,
                                            "scale",
                                            ["Point scale", "Log odds ratios"],
                                            box="Scale",
                                            callback=self.update_scene)

        box = gui.vBox(self.controlArea, "Display features")
        grid = QGridLayout()
        radio_group = gui.radioButtonsInBox(box,
                                            self,
                                            "display_index", [],
                                            orientation=grid,
                                            callback=self.update_scene)
        radio_all = gui.appendRadioButton(radio_group,
                                          "All",
                                          addToLayout=False)
        radio_best = gui.appendRadioButton(radio_group,
                                           "Best ranked:",
                                           addToLayout=False)
        spin_box = gui.hBox(None, margin=0)
        self.n_spin = gui.spin(spin_box,
                               self,
                               "n_attributes",
                               1,
                               self.MAX_N_ATTRS,
                               label=" ",
                               controlWidth=60,
                               callback=self._n_spin_changed)
        grid.addWidget(radio_all, 1, 1)
        grid.addWidget(radio_best, 2, 1)
        grid.addWidget(spin_box, 2, 2)

        self.sort_combo = gui.comboBox(box,
                                       self,
                                       "sort_index",
                                       label="Rank by:",
                                       items=SortBy.items(),
                                       orientation=Qt.Horizontal,
                                       callback=self.update_scene)

        self.cont_feature_dim_combo = gui.comboBox(
            box,
            self,
            "cont_feature_dim_index",
            label="Numeric features: ",
            items=["1D projection", "2D curve"],
            orientation=Qt.Horizontal,
            callback=self.update_scene)

        gui.rubber(self.controlArea)

        class _GraphicsView(QGraphicsView):
            def __init__(self, scene, parent, **kwargs):
                for k, v in dict(
                        verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                        horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                        viewportUpdateMode=QGraphicsView.
                        BoundingRectViewportUpdate,
                        renderHints=(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform),
                        alignment=(Qt.AlignTop | Qt.AlignLeft),
                        sizePolicy=QSizePolicy(
                            QSizePolicy.MinimumExpanding,
                            QSizePolicy.MinimumExpanding)).items():
                    kwargs.setdefault(k, v)

                super().__init__(scene, parent, **kwargs)

        class GraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(
                    scene,
                    parent,
                    verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                    styleSheet='QGraphicsView {background: white}')
                self.viewport().setMinimumWidth(
                    300)  # XXX: This prevents some tests failing
                self._is_resizing = False

            w = self

            def resizeEvent(self, resizeEvent):
                # Recompute main scene on window width change
                if resizeEvent.size().width() != resizeEvent.oldSize().width():
                    self._is_resizing = True
                    self.w.update_scene()
                    self._is_resizing = False
                return super().resizeEvent(resizeEvent)

            def is_resizing(self):
                return self._is_resizing

            def sizeHint(self):
                return QSize(400, 200)

        class FixedSizeGraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene,
                                 parent,
                                 sizePolicy=QSizePolicy(
                                     QSizePolicy.MinimumExpanding,
                                     QSizePolicy.Minimum))

            def sizeHint(self):
                return QSize(400, 85)

        scene = self.scene = QGraphicsScene(self)

        top_view = self.top_view = FixedSizeGraphicsView(scene, self)
        mid_view = self.view = GraphicsView(scene, self)
        bottom_view = self.bottom_view = FixedSizeGraphicsView(scene, self)

        for view in (top_view, mid_view, bottom_view):
            self.mainArea.layout().addWidget(view)

    def _class_combo_changed(self):
        with np.errstate(invalid='ignore'):
            coeffs = [
                np.nan_to_num(p[self.target_class_index] /
                              p[self.old_target_class_index])
                for p in self.points
            ]
        points = [p[self.old_target_class_index] for p in self.points]
        self.feature_marker_values = [
            self.get_points_from_coeffs(v, c, p)
            for (v, c, p) in zip(self.feature_marker_values, coeffs, points)
        ]
        self.feature_marker_values = np.asarray(self.feature_marker_values)
        self.update_scene()
        self.old_target_class_index = self.target_class_index

    def _n_spin_changed(self):
        self.display_index = 1
        self.update_scene()

    def update_controls(self):
        self.class_combo.clear()
        self.norm_check.setHidden(True)
        self.cont_feature_dim_combo.setEnabled(True)
        if self.domain is not None:
            self.class_combo.addItems(self.domain.class_vars[0].values)
            if len(self.domain.attributes) > self.MAX_N_ATTRS:
                self.display_index = 1
            if len(self.domain.class_vars[0].values) > 2:
                self.norm_check.setHidden(False)
            if not self.domain.has_continuous_attributes():
                self.cont_feature_dim_combo.setEnabled(False)
                self.cont_feature_dim_index = 0
        model = self.sort_combo.model()
        item = model.item(SortBy.POSITIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        item = model.item(SortBy.NEGATIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        self.align = OWNomogram.ALIGN_ZERO
        if self.classifier and isinstance(self.classifier,
                                          LogisticRegressionClassifier):
            self.align = OWNomogram.ALIGN_LEFT

    @Inputs.data
    def set_data(self, data):
        self.instances = data
        self.feature_marker_values = []
        self.set_feature_marker_values()
        self.update_scene()

    @Inputs.classifier
    def set_classifier(self, classifier):
        self.closeContext()
        self.classifier = classifier
        self.Error.clear()
        if self.classifier and not isinstance(self.classifier,
                                              self.ACCEPTABLE):
            self.Error.invalid_classifier()
            self.classifier = None
        self.domain = self.classifier.domain if self.classifier else None
        self.data = None
        self.calculate_log_odds_ratios()
        self.calculate_log_reg_coefficients()
        self.update_controls()
        self.target_class_index = 0
        self.openContext(
            self.domain.class_var if self.domain is not None else None)
        self.points = self.log_odds_ratios or self.log_reg_coeffs
        self.feature_marker_values = []
        self.old_target_class_index = self.target_class_index
        self.update_scene()

    def calculate_log_odds_ratios(self):
        self.log_odds_ratios = []
        self.p = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, NaiveBayesModel):
            return

        log_cont_prob = self.classifier.log_cont_prob
        class_prob = self.classifier.class_prob
        for i in range(len(self.domain.attributes)):
            ca = np.exp(log_cont_prob[i]) * class_prob[:, None]
            _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None]
            self.log_odds_ratios.append(np.log(_or))
        self.p = class_prob

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values,
                                   max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack(
                    (coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

    def update_scene(self):
        self.clear_scene()
        if self.domain is None or not len(self.points[0]):
            self.Outputs.features.send(None)
            return

        n_attrs = self.n_attributes if self.display_index else int(1e10)
        attr_inds, attributes = zip(*self.get_ordered_attributes()[:n_attrs])
        self.Outputs.features.send(AttributeList(attributes))

        name_items = [QGraphicsTextItem(attr.name) for attr in attributes]
        point_text = QGraphicsTextItem("Points")
        probs_text = QGraphicsTextItem("Probabilities (%)")
        all_items = name_items + [point_text, probs_text]
        name_offset = -max(t.boundingRect().width() for t in all_items) - 10
        w = self.view.viewport().rect().width()
        max_width = w + name_offset - 30

        points = [self.points[i][self.target_class_index] for i in attr_inds]
        if self.align == OWNomogram.ALIGN_LEFT:
            points = [p - p.min() for p in points]
        max_ = np.nan_to_num(max(max(abs(p)) for p in points))
        d = 100 / max_ if max_ else 1
        minimums = [p[self.target_class_index].min() for p in self.points]
        if self.scale == OWNomogram.POINT_SCALE:
            points = [p * d for p in points]

            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: (x - minimums) * d
            else:
                self.scale_marker_values = lambda x: x * d
        else:
            if self.align == OWNomogram.ALIGN_LEFT:
                self.scale_marker_values = lambda x: x - minimums
            else:
                self.scale_marker_values = lambda x: x

        point_item, nomogram_head = self.create_main_nomogram(
            attributes, attr_inds, name_items, points, max_width, point_text,
            name_offset)
        probs_item, nomogram_foot = self.create_footer_nomogram(
            probs_text, d, minimums, max_width, name_offset)
        for item in self.feature_items.values():
            item.dot.point_dot = point_item.dot
            item.dot.probs_dot = probs_item.dot
            item.dot.vertical_line = self.hidden_vertical_line

        self.nomogram = nomogram = NomogramItem()
        nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot])
        self.scene.addItem(nomogram)

        self.set_feature_marker_values()

        rect = QRectF(self.scene.itemsBoundingRect().x(),
                      self.scene.itemsBoundingRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height()).adjusted(
                          10, 0, 20, 0)
        self.scene.setSceneRect(rect)

        # Clip top and bottom (60 and 150) parts from the main view
        self.view.setSceneRect(rect.x(),
                               rect.y() + 80,
                               rect.width() - 10,
                               rect.height() - 160)
        self.view.viewport().setMaximumHeight(rect.height() - 160)
        # Clip main part from top/bottom views
        # below point values are imprecise (less/more than required) but this
        # is not a problem due to clipped scene content still being drawn
        self.top_view.setSceneRect(rect.x(),
                                   rect.y() + 3,
                                   rect.width() - 10, 20)
        self.bottom_view.setSceneRect(rect.x(),
                                      rect.height() - 110,
                                      rect.width() - 10, 30)

    def create_main_nomogram(self, attributes, attr_inds, name_items, points,
                             max_width, point_text, name_offset):
        cls_index = self.target_class_index
        min_p = min(p.min() for p in points)
        max_p = max(p.max() for p in points)
        values = self.get_ruler_values(min_p, max_p, max_width)
        min_p, max_p = min(values), max(values)
        diff_ = np.nan_to_num(max_p - min_p)
        scale_x = max_width / diff_ if diff_ else max_width

        nomogram_header = NomogramItem()
        point_item = RulerItem(point_text, values, scale_x, name_offset,
                               -scale_x * min_p)
        point_item.setPreferredSize(point_item.preferredWidth(), 35)
        nomogram_header.add_items([point_item])

        self.nomogram_main = NomogramItem()
        cont_feature_item_class = ContinuousFeature2DItem if \
            self.cont_feature_dim_index else ContinuousFeatureItem

        feature_items = [
            DiscreteFeatureItem(
                name_item, attr.values, point, scale_x, name_offset, -scale_x *
                min_p) if attr.is_discrete else cont_feature_item_class(
                    name_item, self.log_reg_cont_data_extremes[i][cls_index],
                    self.get_ruler_values(
                        point.min(), point.max(), scale_x *
                        point.ptp(), False), scale_x, name_offset, -scale_x *
                    min_p) for i, attr, name_item, point in zip(
                        attr_inds, attributes, name_items, points)
        ]

        self.nomogram_main.add_items(feature_items)
        self.feature_items = OrderedDict(sorted(zip(attr_inds, feature_items)))

        x = -scale_x * min_p
        y = self.nomogram_main.layout().preferredHeight() + 10
        self.vertical_line = QGraphicsLineItem(x, -6, x, y)
        self.vertical_line.setPen(QPen(Qt.DotLine))
        self.vertical_line.setParentItem(point_item)
        self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        self.hidden_vertical_line.setPen(pen)
        self.hidden_vertical_line.setParentItem(point_item)

        return point_item, nomogram_header

    def get_ordered_attributes(self):
        """Return (in_domain_index, attr) pairs, ordered by method in SortBy combo"""
        if self.domain is None or not self.domain.attributes:
            return []

        attrs = self.domain.attributes
        sort_by = self.sort_index
        class_value = self.target_class_index

        if sort_by == SortBy.NO_SORTING:
            return list(enumerate(attrs))

        elif sort_by == SortBy.NAME:

            def key(x):
                _, attr = x
                return attr.name.lower()

        elif sort_by == SortBy.ABSOLUTE:

            def key(x):
                i, attr = x
                if attr.is_discrete:
                    ptp = self.points[i][class_value].ptp()
                else:
                    coef = np.abs(
                        self.log_reg_coeffs_orig[i][class_value]).mean()
                    ptp = coef * np.ptp(
                        self.log_reg_cont_data_extremes[i][class_value])
                return -ptp

        elif sort_by == SortBy.POSITIVE:

            def key(x):
                i, attr = x
                max_value = (
                    self.points[i][class_value].max() if attr.is_discrete else
                    np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return -max_value

        elif sort_by == SortBy.NEGATIVE:

            def key(x):
                i, attr = x
                min_value = (
                    self.points[i][class_value].min() if attr.is_discrete else
                    np.mean(self.log_reg_cont_data_extremes[i][class_value]))
                return min_value

        return sorted(enumerate(attrs), key=key)

    def create_footer_nomogram(self, probs_text, d, minimums, max_width,
                               name_offset):
        # pylint: disable=invalid-unary-operand-type
        eps, d_ = 0.05, 1
        k = -np.log(self.p / (1 - self.p)) if self.p is not None else -self.b0
        min_sum = k[self.target_class_index] - np.log((1 - eps) / eps)
        max_sum = k[self.target_class_index] - np.log(eps / (1 - eps))
        if self.align == OWNomogram.ALIGN_LEFT:
            max_sum = max_sum - sum(minimums)
            min_sum = min_sum - sum(minimums)
            for i in range(len(k)):  # pylint: disable=consider-using-enumerate
                k[i] = k[i] - sum(
                    [min(q) for q in [p[i] for p in self.points]])
        if self.scale == OWNomogram.POINT_SCALE:
            min_sum *= d
            max_sum *= d
            d_ = d

        values = self.get_ruler_values(min_sum, max_sum, max_width)
        min_sum, max_sum = min(values), max(values)
        diff_ = np.nan_to_num(max_sum - min_sum)
        scale_x = max_width / diff_ if diff_ else max_width
        cls_var, cls_index = self.domain.class_var, self.target_class_index
        nomogram_footer = NomogramItem()

        def get_normalized_probabilities(val):
            if not self.normalize_probabilities:
                return 1 / (1 + np.exp(k[cls_index] - val / d_))
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum

        def get_points(prob):
            if not self.normalize_probabilities:
                return (k[cls_index] - np.log(1 / prob - 1)) * d_
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_

        probs_item = ProbabilitiesRulerItem(
            probs_text,
            values,
            scale_x,
            name_offset,
            -scale_x * min_sum,
            get_points=get_points,
            title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]),
            get_probabilities=get_normalized_probabilities)
        nomogram_footer.add_items([probs_item])
        return probs_item, nomogram_footer

    def __get_totals_for_class_values(self, minimums):
        cls_index = self.target_class_index
        marker_values = self.scale_marker_values(self.feature_marker_values)
        totals = np.full(len(self.domain.class_var.values), np.nan)
        totals[cls_index] = marker_values.sum()
        for i in range(len(self.domain.class_var.values)):
            if i == cls_index:
                continue
            coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points]
            points = [p[cls_index] for p in self.points]
            total = sum([
                self.get_points_from_coeffs(v, c, p)
                for (v, c,
                     p) in zip(self.feature_marker_values, coeffs, points)
            ])
            if self.align == OWNomogram.ALIGN_LEFT:
                points = [p - m for m, p in zip(minimums, points)]
                total -= sum([min(p) for p in [p[i] for p in self.points]])
            d = 100 / max(max(abs(p)) for p in points)
            if self.scale == OWNomogram.POINT_SCALE:
                total *= d
            totals[i] = total
        assert not np.any(np.isnan(totals))
        return totals

    def set_feature_marker_values(self):
        if not (len(self.points) and len(self.feature_items)):
            return
        if not len(self.feature_marker_values):
            self._init_feature_marker_values()
        marker_values = self.scale_marker_values(self.feature_marker_values)

        invisible_sum = 0
        for i, marker in enumerate(marker_values):
            try:
                item = self.feature_items[i]
            except KeyError:
                invisible_sum += marker
            else:
                item.dot.move_to_val(marker)

        item.dot.probs_dot.move_to_sum(invisible_sum)

    def _init_feature_marker_values(self):
        self.feature_marker_values = []
        cls_index = self.target_class_index
        instances = Table(self.domain, self.instances) \
            if self.instances else None
        values = []
        for i, attr in enumerate(self.domain.attributes):
            value, feature_val = 0, None
            if len(self.log_reg_coeffs):
                if attr.is_discrete:
                    ind, n = unique(self.data.X[:, i], return_counts=True)
                    feature_val = np.nan_to_num(ind[np.argmax(n)])
                else:
                    feature_val = nanmean(self.data.X[:, i])

            # If data is provided on a separate signal, use the first data
            # instance to position the points instead of the mean
            inst_in_dom = instances and attr in instances.domain
            if inst_in_dom and not np.isnan(instances[0][attr]):
                feature_val = instances[0][attr]

            if feature_val is not None:
                value = (self.points[i][cls_index][int(feature_val)]
                         if attr.is_discrete else
                         self.log_reg_coeffs_orig[i][cls_index][0] *
                         feature_val)
            values.append(value)
        self.feature_marker_values = np.asarray(values)

    def clear_scene(self):
        self.feature_items = {}
        self.scale_marker_values = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.scene.clear()

    def send_report(self):
        self.report_plot()

    @staticmethod
    def reconstruct_domain(original, preprocessed):
        # abuse dict to make "in" comparisons faster
        attrs = OrderedDict()
        for attr in preprocessed.attributes:
            cv = attr._compute_value.variable._compute_value
            var = cv.variable if cv else original[attr.name]
            if var in attrs:  # the reason for OrderedDict
                continue
            attrs[var] = None  # we only need keys
        attrs = list(attrs.keys())
        return Domain(attrs, original.class_var, original.metas)

    @staticmethod
    def get_ruler_values(start, stop, max_width, round_to_nearest=True):
        if max_width == 0:
            return [0]
        diff = np.nan_to_num((stop - start) / max_width)
        if diff <= 0:
            return [0]
        decimals = int(np.floor(np.log10(diff)))
        if diff > 4 * pow(10, decimals):
            step = 5 * pow(10, decimals + 2)
        elif diff > 2 * pow(10, decimals):
            step = 2 * pow(10, decimals + 2)
        elif diff > 1 * pow(10, decimals):
            step = 1 * pow(10, decimals + 2)
        else:
            step = 5 * pow(10, decimals + 1)
        round_by = int(-np.floor(np.log10(step)))
        r = start % step
        if not round_to_nearest:
            _range = np.arange(start + step, stop + r, step) - r
            start, stop = np.floor(start * 100) / 100, np.ceil(
                stop * 100) / 100
            return np.round(np.hstack((start, _range, stop)), 2)
        return np.round(np.arange(start, stop + r + step, step) - r, round_by)

    @staticmethod
    def get_points_from_coeffs(current_value, coefficients, possible_values):
        if np.isnan(possible_values).any():
            return 0
        # pylint: disable=undefined-loop-variable
        indices = np.argsort(possible_values)
        sorted_values = possible_values[indices]
        sorted_coefficients = coefficients[indices]
        for i, val in enumerate(sorted_values):
            if current_value < val:
                break
        diff = sorted_values[i] - sorted_values[i - 1]
        k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \
                                  (sorted_values[i] - sorted_values[i - 1])
        return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \
               sorted_coefficients[i] * sorted_values[i] * (1 - k)

    def reset_settings(self):
        self._reset_settings()
        self.update_scene()
Beispiel #26
0
class OWDistanceMatrix(widget.OWWidget):
    name = "Distance Matrix"
    description = "View distance matrix."
    icon = "icons/DistanceMatrix.svg"
    priority = 200

    class Inputs:
        distances = Input("Distances", DistMatrix)

    class Outputs:
        distances = Output("Distances", DistMatrix, dynamic=False)
        table = Output("Table", Table)

    settingsHandler = DistanceMatrixContextHandler()
    auto_commit = Setting(True)
    annotation_idx = ContextSetting(1)
    selection = ContextSetting([])

    want_control_area = False

    def __init__(self):
        super().__init__()
        self.distances = None
        self.items = None

        self.tablemodel = DistanceMatrixModel()
        view = self.tableview = QTableView()
        view.setEditTriggers(QTableView.NoEditTriggers)
        view.setItemDelegate(TableBorderItem())
        view.setModel(self.tablemodel)
        view.setShowGrid(False)
        for header in (view.horizontalHeader(), view.verticalHeader()):
            header.setSectionResizeMode(QHeaderView.ResizeToContents)
            header.setHighlightSections(True)
            header.setSectionsClickable(False)
        view.verticalHeader().setDefaultAlignment(
            Qt.AlignRight | Qt.AlignVCenter)
        selmodel = SymmetricSelectionModel(view.model(), view)
        view.setSelectionModel(selmodel)
        view.setSelectionBehavior(QTableView.SelectItems)
        self.mainArea.layout().addWidget(view)

        settings_box = gui.hBox(self.mainArea)

        self.annot_combo = gui.comboBox(
            settings_box, self, "annotation_idx", label="Labels: ",
            orientation=Qt.Horizontal,
            callback=self._invalidate_annotations, contentsLength=12)
        self.annot_combo.setModel(VariableListModel())
        self.annot_combo.model()[:] = ["None", "Enumeration"]
        gui.rubber(settings_box)
        settings_box.layout().addWidget(self.report_button)
        gui.separator(settings_box, 40)
        acb = gui.auto_commit(settings_box, self, "auto_commit",
                              "Send Selected", "Send Automatically", box=None)
        acb.setFixedWidth(200)
        # Signal must be connected after self.commit is redirected
        selmodel.selectionChanged.connect(self.commit)

    def sizeHint(self):
        return QSize(800, 500)

    @Inputs.distances
    def set_distances(self, distances):
        self.closeContext()
        self.distances = distances
        self.tablemodel.set_data(self.distances)
        self.selection = []
        self.tableview.selectionModel().set_selected_items([])

        self.items = items = distances is not None and distances.row_items
        annotations = ["None", "Enumerate"]
        self.annotation_idx = 1
        if items and not distances.axis:
            annotations.append("Attribute names")
            self.annotation_idx = 2
        elif isinstance(items, list) and \
                all(isinstance(item, Variable) for item in items):
            annotations.append("Name")
            self.annotation_idx = 2
        elif isinstance(items, Table):
            annotations.extend(
                itertools.chain(items.domain, items.domain.metas))
            if items.domain.class_var:
                self.annotation_idx = 2 + len(items.domain.attributes)
        self.annot_combo.model()[:] = annotations

        if items:
            self.openContext(distances, annotations)
            self._update_labels()
            self.tableview.resizeColumnsToContents()
        self.commit()

    def _invalidate_annotations(self):
        if self.distances is not None:
            self._update_labels()

    def _update_labels(self):
        var = column = None
        if self.annotation_idx == 0:
            labels = None
        elif self.annotation_idx == 1:
            labels = [str(i + 1) for i in range(self.distances.shape[0])]
        elif self.annot_combo.model()[self.annotation_idx] == "Attribute names":
            attr = self.distances.row_items.domain.attributes
            labels = [str(attr[i]) for i in range(self.distances.shape[0])]
        elif self.annotation_idx == 2 and \
                isinstance(self.items, widget.AttributeList):
            labels = [v.name for v in self.items]
        elif isinstance(self.items, Table):
            var = self.annot_combo.model()[self.annotation_idx]
            column, _ = self.items.get_column_view(var)
            labels = [var.str_val(value) for value in column]
        saved_selection = self.tableview.selectionModel().selected_items()
        self.tablemodel.set_labels(labels, var, column)
        if labels:
            self.tableview.horizontalHeader().show()
            self.tableview.verticalHeader().show()
        else:
            self.tableview.horizontalHeader().hide()
            self.tableview.verticalHeader().hide()
        self.tableview.resizeColumnsToContents()
        self.tableview.selectionModel().set_selected_items(saved_selection)

    def commit(self):
        sub_table = sub_distances = None
        if self.distances is not None:
            inds = self.tableview.selectionModel().selected_items()
            if inds:
                sub_distances = self.distances.submatrix(inds)
                if self.distances.axis and isinstance(self.items, Table):
                    sub_table = self.items[inds]
        self.Outputs.distances.send(sub_distances)
        self.Outputs.table.send(sub_table)

    def send_report(self):
        if self.distances is None:
            return
        model = self.tablemodel
        dim = self.distances.shape[0]
        col_cell = model.color_for_cell

        def _rgb(brush):
            return "rgb({}, {}, {})".format(*brush.color().getRgb())
        if model.labels:
            col_label = model.color_for_label
            label_colors = [_rgb(col_label(i)) for i in range(dim)]
            self.report_raw('<table style="border-collapse:collapse">')
            self.report_raw("<tr><td></td>")
            self.report_raw("".join(
                    '<td style="background-color: {}">{}</td>'.format(*cv)
                    for cv in zip(label_colors, model.labels)))
            self.report_raw("</tr>")
            for i in range(dim):
                self.report_raw("<tr>")
                self.report_raw(
                    '<td style="background-color: {}">{}</td>'.
                    format(label_colors[i], model.labels[i]))
                self.report_raw(
                    "".join(
                        '<td style="background-color: {};'
                        'border-top:1px solid {}; border-left:1px solid {};">'
                        '{:.3f}</td>'.format(
                            _rgb(col_cell(i, j)),
                            label_colors[i], label_colors[j],
                            self.distances[i, j])
                        for j in range(dim)))
                self.report_raw("</tr>")
            self.report_raw("</table>")
        else:
            self.report_raw('<table>')
            for i in range(dim):
                self.report_raw(
                    "<tr>" +
                    "".join('<td style="background-color: {}">{:.3f}</td>'.
                            format(_rgb(col_cell(i, j)), self.distances[i, j])
                            for j in range(dim)) +
                    "</tr>")
            self.report_raw("</table>")
Beispiel #27
0
class OWFile(widget.OWWidget, RecentPathsWComboMixin):
    name = "File"
    id = "orange.widgets.data.file"
    description = "Read data from an input file or network " \
                  "and send a data table to the output."
    icon = "icons/File.svg"
    priority = 10
    category = "Data"
    keywords = ["file", "load", "read", "open"]

    class Outputs:
        data = Output("Data",
                      Table,
                      doc="Attribute-valued dataset read from the input file.")

    want_main_area = False

    SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())]
    SIZE_LIMIT = 1e7
    LOCAL_FILE, URL = range(2)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    # Overload RecentPathsWidgetMixin.recent_paths to set defaults
    recent_paths = Setting([
        RecentPath("", "sample-datasets", "iris.tab"),
        RecentPath("", "sample-datasets", "titanic.tab"),
        RecentPath("", "sample-datasets", "housing.tab"),
        RecentPath("", "sample-datasets", "heart_disease.tab"),
    ])
    recent_urls = Setting([])
    source = Setting(LOCAL_FILE)
    xls_sheet = ContextSetting("")
    sheet_names = Setting({})
    url = Setting("")

    variables = ContextSetting([])

    domain_editor = SettingProvider(DomainEditor)

    class Warning(widget.OWWidget.Warning):
        file_too_big = widget.Msg(
            "The file is too large to load automatically."
            " Press Reload to load.")
        load_warning = widget.Msg("Read warning:\n{}")

    class Error(widget.OWWidget.Error):
        file_not_found = widget.Msg("File not found.")
        missing_reader = widget.Msg("Missing reader.")
        sheet_error = widget.Msg("Error listing available sheets.")
        unknown = widget.Msg("Read error:\n{}")

    def __init__(self):
        super().__init__()
        RecentPathsWComboMixin.__init__(self)
        self.domain = None
        self.data = None
        self.loaded_file = ""
        self.reader = None

        layout = QGridLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)
        vbox = gui.radioButtons(None,
                                self,
                                "source",
                                box=True,
                                addSpace=True,
                                callback=self.load_data,
                                addToLayout=False)

        rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False)
        layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter)

        box = gui.hBox(None, addToLayout=False, margin=0)
        box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.activated[int].connect(self.select_file)
        box.layout().addWidget(self.file_combo)
        layout.addWidget(box, 0, 1)

        file_button = gui.button(None,
                                 self,
                                 '...',
                                 callback=self.browse_file,
                                 autoDefault=False)
        file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon))
        file_button.setSizePolicy(Policy.Maximum, Policy.Fixed)
        layout.addWidget(file_button, 0, 2)

        reload_button = gui.button(None,
                                   self,
                                   "Reload",
                                   callback=self.load_data,
                                   autoDefault=False)
        reload_button.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed)
        layout.addWidget(reload_button, 0, 3)

        self.sheet_box = gui.hBox(None, addToLayout=False, margin=0)
        self.sheet_combo = gui.comboBox(
            None,
            self,
            "xls_sheet",
            callback=self.select_sheet,
            sendSelectedValue=True,
        )
        self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_label = QLabel()
        self.sheet_label.setText('Sheet')
        self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft)
        self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter)
        layout.addWidget(self.sheet_box, 2, 1)
        self.sheet_box.hide()

        rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False)
        layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter)

        self.url_combo = url_combo = QComboBox()
        url_model = NamedURLModel(self.sheet_names)
        url_model.wrap(self.recent_urls)
        url_combo.setLineEdit(LineEditSelectOnFocus())
        url_combo.setModel(url_model)
        url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        url_combo.setEditable(True)
        url_combo.setInsertPolicy(url_combo.InsertAtTop)
        url_edit = url_combo.lineEdit()
        l, t, r, b = url_edit.getTextMargins()
        url_edit.setTextMargins(l + 5, t, r, b)
        layout.addWidget(url_combo, 3, 1, 3, 3)
        url_combo.activated.connect(self._url_set)

        box = gui.vBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, 'No data loaded.')
        self.warnings = gui.widgetLabel(box, '')

        box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)")
        self.domain_editor = DomainEditor(self)
        self.editor_model = self.domain_editor.model()
        box.layout().addWidget(self.domain_editor)

        box = gui.hBox(self.controlArea)
        gui.button(box,
                   self,
                   "Browse documentation datasets",
                   callback=lambda: self.browse_file(True),
                   autoDefault=False)
        gui.rubber(box)

        self.apply_button = gui.button(box,
                                       self,
                                       "Apply",
                                       callback=self.apply_domain_edit)
        self.apply_button.setEnabled(False)
        self.apply_button.setFixedWidth(170)
        self.editor_model.dataChanged.connect(
            lambda: self.apply_button.setEnabled(True))

        self.set_file_list()
        # Must not call open_file from within __init__. open_file
        # explicitly re-enters the event loop (by a progress bar)

        self.setAcceptDrops(True)

        if self.source == self.LOCAL_FILE:
            last_path = self.last_path()
            if last_path and os.path.exists(last_path) and \
                    os.path.getsize(last_path) > self.SIZE_LIMIT:
                self.Warning.file_too_big()
                return

        QTimer.singleShot(0, self.load_data)

    def sizeHint(self):
        return QSize(600, 550)

    def select_file(self, n):
        assert n < len(self.recent_paths)
        super().select_file(n)
        if self.recent_paths:
            self.source = self.LOCAL_FILE
            self.load_data()
            self.set_file_list()

    def select_sheet(self):
        self.recent_paths[0].sheet = self.sheet_combo.currentText()
        self.load_data()

    def _url_set(self):
        url = self.url_combo.currentText()
        pos = self.recent_urls.index(url)
        url = url.strip()

        if not urlparse(url).scheme:
            url = 'http://' + url
            self.url_combo.setItemText(pos, url)
            self.recent_urls[pos] = url

        self.source = self.URL
        self.load_data()

    def browse_file(self, in_demos=False):
        if in_demos:
            start_file = get_sample_datasets_dir()
            if not os.path.exists(start_file):
                QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with documentation datasets")
                return
        else:
            start_file = self.last_path() or os.path.expanduser("~/")

        readers = [
            f for f in FileFormat.formats
            if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)
        ]
        filename, reader, _ = open_filename_dialog(start_file, None, readers)
        if not filename:
            return
        self.add_path(filename)
        if reader is not None:
            self.recent_paths[0].file_format = reader.qualified_name()

        self.source = self.LOCAL_FILE
        self.load_data()

    # Open a file, create data from it and send it over the data channel
    def load_data(self):
        # We need to catch any exception type since anything can happen in
        # file readers
        self.closeContext()
        self.domain_editor.set_domain(None)
        self.apply_button.setEnabled(False)
        self.clear_messages()
        self.set_file_list()

        error = self._try_load()
        if error:
            error()
            self.data = None
            self.sheet_box.hide()
            self.Outputs.data.send(None)
            self.info.setText("No data.")

    def _try_load(self):
        # pylint: disable=broad-except
        if self.last_path() and not os.path.exists(self.last_path()):
            return self.Error.file_not_found

        try:
            self.reader = self._get_reader()
            assert self.reader is not None
        except Exception:
            return self.Error.missing_reader

        try:
            self._update_sheet_combo()
        except Exception:
            return self.Error.sheet_error

        with catch_warnings(record=True) as warnings:
            try:
                data = self.reader.read()
            except Exception as ex:
                log.exception(ex)
                return lambda x=ex: self.Error.unknown(str(x))
            if warnings:
                self.Warning.load_warning(warnings[-1].message.args[0])

        self.info.setText(self._describe(data))

        self.loaded_file = self.last_path()
        add_origin(data, self.loaded_file)
        self.data = data
        self.openContext(data.domain)
        self.apply_domain_edit()  # sends data

    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
            else:
                reader = FileFormat.get_reader(path)
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        elif self.source == self.URL:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)

    def _update_sheet_combo(self):
        if len(self.reader.sheets) < 2:
            self.sheet_box.hide()
            self.reader.select_sheet(None)
            return

        self.sheet_combo.clear()
        self.sheet_combo.addItems(self.reader.sheets)
        self._select_active_sheet()
        self.sheet_box.show()

    def _select_active_sheet(self):
        if self.reader.sheet:
            try:
                idx = self.reader.sheets.index(self.reader.sheet)
                self.sheet_combo.setCurrentIndex(idx)
            except ValueError:
                # Requested sheet does not exist in this file
                self.reader.select_sheet(None)
        else:
            self.sheet_combo.setCurrentIndex(0)

    def _describe(self, table):
        domain = table.domain
        text = ""

        attrs = getattr(table, "attributes", {})
        descs = [
            attrs[desc] for desc in ("Name", "Description") if desc in attrs
        ]
        if len(descs) == 2:
            descs[0] = "<b>{}</b>".format(descs[0])
        if descs:
            text += "<p>{}</p>".format("<br/>".join(descs))

        text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\
            format(len(table), len(domain.attributes), len(domain.metas))
        if domain.has_continuous_class:
            text += "<br/>Regression; numerical class."
        elif domain.has_discrete_class:
            text += "<br/>Classification; categorical class with {} values.".\
                format(len(domain.class_var.values))
        elif table.domain.class_vars:
            text += "<br/>Multi-target; {} target variables.".format(
                len(table.domain.class_vars))
        else:
            text += "<br/>Data has no target variable."
        text += "</p>"

        if 'Timestamp' in table.domain:
            # Google Forms uses this header to timestamp responses
            text += '<p>First entry: {}<br/>Last entry: {}</p>'.format(
                table[0, 'Timestamp'], table[-1, 'Timestamp'])
        return text

    def storeSpecificSettings(self):
        self.current_context.modified_variables = self.variables[:]

    def retrieveSpecificSettings(self):
        if hasattr(self.current_context, "modified_variables"):
            self.variables[:] = self.current_context.modified_variables

    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)

    def get_widget_name_extension(self):
        _, name = os.path.split(self.loaded_file)
        return os.path.splitext(name)[0]

    def send_report(self):
        def get_ext_name(filename):
            try:
                return FileFormat.names[os.path.splitext(filename)[1]]
            except KeyError:
                return "unknown"

        if self.data is None:
            self.report_paragraph("File", "No file.")
            return

        if self.source == self.LOCAL_FILE:
            home = os.path.expanduser("~")
            if self.loaded_file.startswith(home):
                # os.path.join does not like ~
                name = "~" + os.path.sep + \
                       self.loaded_file[len(home):].lstrip("/").lstrip("\\")
            else:
                name = self.loaded_file
            if self.sheet_combo.isVisible():
                name += " ({})".format(self.sheet_combo.currentText())
            self.report_items("File", [("File name", name),
                                       ("Format", get_ext_name(name))])
        else:
            self.report_items("Data", [("Resource", self.url),
                                       ("Format", get_ext_name(self.url))])

        self.report_data("Data", self.data)

    def dragEnterEvent(self, event):
        """Accept drops of valid file urls"""
        urls = event.mimeData().urls()
        if urls:
            try:
                FileFormat.get_reader(
                    OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile())
                event.acceptProposedAction()
            except IOError:
                pass

    def dropEvent(self, event):
        """Handle file drops"""
        urls = event.mimeData().urls()
        if urls:
            self.add_path(
                OSX_NSURL_toLocalFile(urls[0])
                or urls[0].toLocalFile())  # add first file
            self.source = self.LOCAL_FILE
            self.load_data()
Beispiel #28
0
class OWAggregate(widget.OWWidget):
    name = 'Aggregate'
    description = "Aggregate data in bins by second, minute, hour, day, " \
                  "week, month, or year."
    icon = 'icons/Aggregate.svg'
    priority = 560

    class Inputs:
        time_series = Input("Time series", Table)

    class Outputs:
        time_series = Output("Time series", Timeseries)

    settingsHandler = DomainContextHandler()
    variables = ContextSetting([])
    agg_funcs = ContextSetting([])

    want_main_area = False

    agg_interval = settings.Setting('day')
    autocommit = settings.Setting(False)

    AGG_TIME = OrderedDict((
        ('second', lambda date: date.replace(microsecond=0)),
        ('minute', lambda date: date.replace(second=0, microsecond=0)),
        ('hour', lambda date: date.replace(minute=0, second=0, microsecond=0)),
        ('day',
         lambda date: date.replace(hour=0, minute=0, second=0, microsecond=0)),
        ('week',
         lambda date: date.strptime(date.strftime('%Y-W%W-0'), '%Y-W%W-%w')
         ),  # Doesn't work for years before 1000
        ('month', lambda date: date.replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)),
        ('year', lambda date: date.replace(
            month=1, day=1, hour=0, minute=0, second=0, microsecond=0)),
    ))

    class Error(widget.OWWidget.Error):
        no_time_variable = widget.Msg(
            'Aggregation currently requires a time series with a time variable.'
        )

    def __init__(self):
        self.data = None

        gui.comboBox(self.controlArea,
                     self,
                     'agg_interval',
                     label='Aggregate by:',
                     items=tuple(self.AGG_TIME.keys()),
                     sendSelectedValue=True,
                     orientation=Qt.Horizontal,
                     callback=lambda: self.commit)
        self.model = model = PyTableModel(parent=self, editable=[False, True])
        model.setHorizontalHeaderLabels(['Attribute', 'Aggregation function'])

        class TableView(gui.TableView):
            def __init__(self, parent):
                super().__init__(
                    parent,
                    editTriggers=(self.SelectedClicked | self.CurrentChanged
                                  | self.DoubleClicked | self.EditKeyPressed),
                )
                self.horizontalHeader().setStretchLastSection(False)
                self.setItemDelegateForColumn(1, self.ComboDelegate(self))

            class _ItemDelegate(QStyledItemDelegate):
                def updateEditorGeometry(self, widget, option, _index):
                    widget.setGeometry(option.rect)

            class ComboDelegate(_ItemDelegate):
                def __init__(self, parent):
                    super().__init__(parent)
                    self._parent = parent
                    self._combo_continuous_model = ListModel(AGG_FUNCTIONS,
                                                             parent=self)
                    self._combo_discrete_model = ListModel([Mode], parent=self)
                    self._combo_string_model = ListModel([Concatenate],
                                                         parent=self)

                def createEditor(self, parent, _QStyleOptionViewItem, index):
                    combo = QComboBox(parent)
                    attr = index.model()[index.row()][0]
                    combo.setModel(
                        self._combo_continuous_model if attr.
                        is_continuous else self._combo_discrete_model if attr.
                        is_discrete else self._combo_string_model)
                    return combo

                def setEditorData(self, combo, index):
                    var = index.model().data(index, Qt.EditRole)
                    combo.setCurrentIndex(combo.model().indexOf(var))

                def setModelData(self, combo, model, index):
                    func = combo.model()[combo.currentIndex()]
                    model.setData(index, func, Qt.EditRole)

        view = TableView(self)
        view.setModel(model)
        self.settingsAboutToBePacked.connect(self.pack_settings)
        self.controlArea.layout().addWidget(view)
        gui.auto_commit(self.controlArea, self, 'autocommit', '&Apply')

    @Inputs.time_series
    def set_data(self, data):
        self.pack_settings()
        self.closeContext()
        self.Error.clear()
        data = None if data is None else Timeseries.from_data_table(data)
        if data is not None and not isinstance(data.time_variable,
                                               TimeVariable):
            self.Error.no_time_variable()
            data = None
        self.data = data
        if data is None:
            self.model.clear()
            self.commit()
            return
        self.set_default(self.data)
        self.openContext(self.data)
        self.unpack_settings()
        self.commit()

    def set_default(self, data):
        self.variables = [
            attr for attr in chain(data.domain.variables, data.domain.metas)
            if attr != data.time_variable
        ]
        self.agg_funcs = [
            AGG_FUNCTIONS[0] if attr.is_continuous else Mode
            if attr.is_discrete else Concatenate if attr.is_string else None
            for attr in self.variables
        ]

    def pack_settings(self):
        self.variables = [i[0] for i in self.model.tolist()]
        self.agg_funcs = [i[1] for i in self.model.tolist()]

    def unpack_settings(self):
        self.model[:] = [[var, func]
                         for var, func in zip(self.variables, self.agg_funcs)]

    def commit(self):
        data = self.data
        if not data:
            self.Outputs.time_series.send(None)
            return

        # Group-by expects data sorted
        sorted_indices = np.argsort(data.time_values)
        if not np.all(sorted_indices == np.arange(len(data))):
            data = Timeseries.from_data_table(
                Table.from_table_rows(data, sorted_indices))

        attrs, cvars, metas = [], [], []
        for attr, _ in self.model:
            if attr in data.domain.attributes:
                attrs.append(attr)
            elif attr in data.domain.class_vars:
                cvars.append(attr)
            else:
                metas.append(attr)

        aggregate_time = self.AGG_TIME[self.agg_interval]

        def time_key(i):
            return timestamp(
                aggregate_time(
                    fromtimestamp(data.time_values[i],
                                  tz=data.time_variable.timezone)))

        times = []
        X, Y, M = [], [], []
        for key_time, indices in groupby(np.arange(len(data)), key=time_key):
            times.append(key_time)
            subset = data[list(indices)]

            xs, ys, ms = [], [], []
            for attr, func in self.model:
                values = Table.from_table(
                    Domain([], [], [attr], source=data.domain), subset).metas
                out = (xs if attr in data.domain.attributes else
                       ys if attr in data.domain.class_vars else ms)
                out.append(func(values))

            X.append(xs)
            Y.append(ys)
            M.append(ms)

        ts = Timeseries(Domain([data.time_variable] + attrs, cvars, metas),
                        np.column_stack((times, np.row_stack(X))), np.array(Y),
                        np.array(np.row_stack(M), dtype=object))
        self.Outputs.time_series.send(ts)
class OWFeatureConstructor(OWWidget):
    name = "Feature Constructor"
    description = "Construct new features (data columns) from a set of " \
                  "existing features in the input dataset."
    icon = "icons/FeatureConstructor.svg"
    keywords = ['function', 'lambda']

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    want_main_area = False

    settingsHandler = FeatureConstructorHandler()
    descriptors = ContextSetting([])
    currentIndex = ContextSetting(-1)
    expressions_with_values = ContextSetting(False)
    settings_version = 2

    EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor),
               (DateTimeDescriptor, DateTimeFeatureEditor),
               (DiscreteDescriptor, DiscreteFeatureEditor),
               (StringDescriptor, StringFeatureEditor)]

    class Error(OWWidget.Error):
        more_values_needed = Msg("Categorical feature {} needs more values.")
        invalid_expressions = Msg("Invalid expressions: {}.")

    class Warning(OWWidget.Warning):
        renamed_var = Msg("Recently added variable has been renamed, "
                          "to avoid duplicates.\n")

    def __init__(self):
        super().__init__()
        self.data = None
        self.editors = {}

        box = gui.vBox(self.controlArea, "Variable Definitions")

        toplayout = QHBoxLayout()
        toplayout.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(toplayout)

        self.editorstack = QStackedWidget(sizePolicy=QSizePolicy(
            QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding))

        for descclass, editorclass in self.EDITORS:
            editor = editorclass()
            editor.featureChanged.connect(self._on_modified)
            self.editors[descclass] = editor
            self.editorstack.addWidget(editor)

        self.editorstack.setEnabled(False)

        buttonlayout = QVBoxLayout(spacing=10)
        buttonlayout.setContentsMargins(0, 0, 0, 0)

        self.addbutton = QPushButton("New",
                                     toolTip="Create a new variable",
                                     minimumWidth=120,
                                     shortcut=QKeySequence.New)

        def unique_name(fmt, reserved):
            candidates = (fmt.format(i) for i in count(1))
            return next(c for c in candidates if c not in reserved)

        def generate_newname(fmt):
            return unique_name(fmt, self.reserved_names())

        menu = QMenu(self.addbutton)
        cont = menu.addAction("Numeric")
        cont.triggered.connect(lambda: self.addFeature(
            ContinuousDescriptor(generate_newname("X{}"), "", 3)))
        disc = menu.addAction("Categorical")
        disc.triggered.connect(lambda: self.addFeature(
            DiscreteDescriptor(generate_newname("D{}"), "", (), False)))
        string = menu.addAction("Text")
        string.triggered.connect(lambda: self.addFeature(
            StringDescriptor(generate_newname("S{}"), "")))
        datetime = menu.addAction("Date/Time")
        datetime.triggered.connect(lambda: self.addFeature(
            DateTimeDescriptor(generate_newname("T{}"), "")))

        menu.addSeparator()
        self.duplicateaction = menu.addAction("Duplicate Selected Variable")
        self.duplicateaction.triggered.connect(self.duplicateFeature)
        self.duplicateaction.setEnabled(False)
        self.addbutton.setMenu(menu)

        self.removebutton = QPushButton("Remove",
                                        toolTip="Remove selected variable",
                                        minimumWidth=120,
                                        shortcut=QKeySequence.Delete)
        self.removebutton.clicked.connect(self.removeSelectedFeature)

        buttonlayout.addWidget(self.addbutton)
        buttonlayout.addWidget(self.removebutton)
        buttonlayout.addStretch(10)

        toplayout.addLayout(buttonlayout, 0)
        toplayout.addWidget(self.editorstack, 10)

        # Layout for the list view
        layout = QVBoxLayout(spacing=1, margin=0)
        self.featuremodel = DescriptorModel(parent=self)

        self.featureview = QListView(minimumWidth=200,
                                     minimumHeight=50,
                                     sizePolicy=QSizePolicy(
                                         QSizePolicy.Minimum,
                                         QSizePolicy.MinimumExpanding))

        self.featureview.setItemDelegate(FeatureItemDelegate(self))
        self.featureview.setModel(self.featuremodel)
        self.featureview.selectionModel().selectionChanged.connect(
            self._on_selectedVariableChanged)

        layout.addWidget(self.featureview)

        box.layout().addLayout(layout, 1)

        self.fix_button = gui.button(self.buttonsArea,
                                     self,
                                     "Upgrade Expressions",
                                     callback=self.fix_expressions)
        self.fix_button.setHidden(True)
        gui.button(self.buttonsArea,
                   self,
                   "Send",
                   callback=self.apply,
                   default=True)

    def setCurrentIndex(self, index):
        index = min(index, len(self.featuremodel) - 1)
        self.currentIndex = index
        if index >= 0:
            itemmodels.select_row(self.featureview, index)
            desc = self.featuremodel[min(index, len(self.featuremodel) - 1)]
            editor = self.editors[type(desc)]
            self.editorstack.setCurrentWidget(editor)
            editor.setEditorData(desc, self.data.domain if self.data else None)
        self.editorstack.setEnabled(index >= 0)
        self.duplicateaction.setEnabled(index >= 0)
        self.removebutton.setEnabled(index >= 0)

    def _on_selectedVariableChanged(self, selected, *_):
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        else:
            self.setCurrentIndex(-1)

    def _on_modified(self):
        if self.currentIndex >= 0:
            self.Warning.clear()
            editor = self.editorstack.currentWidget()
            proposed = editor.editorData().name
            uniq = get_unique_names(self.reserved_names(self.currentIndex),
                                    proposed)

            feature = editor.editorData()
            if editor.editorData().name != uniq:
                self.Warning.renamed_var()
                feature = feature.__class__(uniq, *feature[1:])

            self.featuremodel[self.currentIndex] = feature
            self.descriptors = list(self.featuremodel)

    def setDescriptors(self, descriptors):
        """
        Set a list of variable descriptors to edit.
        """
        self.descriptors = descriptors
        self.featuremodel[:] = list(self.descriptors)

    def reserved_names(self, idx_=None):
        varnames = []
        if self.data is not None:
            varnames = [
                var.name
                for var in self.data.domain.variables + self.data.domain.metas
            ]
        varnames += [
            desc.name for idx, desc in enumerate(self.featuremodel)
            if idx != idx_
        ]
        return set(varnames)

    @Inputs.data
    @check_sql_input
    def setData(self, data=None):
        """Set the input dataset."""
        self.closeContext()

        self.data = data
        self.expressions_with_values = False

        if self.data is not None:
            descriptors = list(self.descriptors)
            currindex = self.currentIndex
            self.descriptors = []
            self.currentIndex = -1
            self.openContext(data)
            self.fix_button.setHidden(not self.expressions_with_values)

            if descriptors != self.descriptors or \
                    self.currentIndex != currindex:
                # disconnect from the selection model while reseting the model
                selmodel = self.featureview.selectionModel()
                selmodel.selectionChanged.disconnect(
                    self._on_selectedVariableChanged)

                self.featuremodel[:] = list(self.descriptors)
                self.setCurrentIndex(self.currentIndex)

                selmodel.selectionChanged.connect(
                    self._on_selectedVariableChanged)

        self.editorstack.setEnabled(self.currentIndex >= 0)

    def handleNewSignals(self):
        if self.data is not None:
            self.apply()
        else:
            self.Outputs.data.send(None)
            self.fix_button.setHidden(True)

    def addFeature(self, descriptor):
        self.featuremodel.append(descriptor)
        self.setCurrentIndex(len(self.featuremodel) - 1)
        editor = self.editorstack.currentWidget()
        editor.nameedit.setFocus()
        editor.nameedit.selectAll()

    def removeFeature(self, index):
        del self.featuremodel[index]
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        elif index is None and self.featuremodel.rowCount():
            # Deleting the last item clears selection
            self.setCurrentIndex(self.featuremodel.rowCount() - 1)

    def removeSelectedFeature(self):
        if self.currentIndex >= 0:
            self.removeFeature(self.currentIndex)

    def duplicateFeature(self):
        desc = self.featuremodel[self.currentIndex]
        self.addFeature(copy.deepcopy(desc))

    @staticmethod
    def check_attrs_values(attr, data):
        for i in range(len(data)):
            for var in attr:
                if not math.isnan(data[i, var]) \
                        and int(data[i, var]) >= len(var.values):
                    return var.name
        return None

    def _validate_descriptors(self, desc):
        def validate(source):
            try:
                return validate_exp(ast.parse(source, mode="eval"))
            # ast.parse can return arbitrary errors, not only SyntaxError
            # pylint: disable=broad-except
            except Exception:
                return False

        final = []
        invalid = []
        for d in desc:
            if validate(d.expression):
                final.append(d)
            else:
                final.append(d._replace(expression=""))
                invalid.append(d)

        if invalid:
            self.Error.invalid_expressions(", ".join(s.name for s in invalid))

        return final

    def apply(self):
        def report_error(err):
            log = logging.getLogger(__name__)
            log.error("", exc_info=True)
            self.error("".join(format_exception_only(type(err), err)).rstrip())

        self.Error.clear()

        if self.data is None:
            return

        desc = list(self.featuremodel)
        desc = self._validate_descriptors(desc)
        try:
            new_variables = construct_variables(desc, self.data,
                                                self.expressions_with_values)
        # user's expression can contain arbitrary errors
        except Exception as err:  # pylint: disable=broad-except
            report_error(err)
            return

        attrs = [var for var in new_variables if var.is_primitive()]
        metas = [var for var in new_variables if not var.is_primitive()]
        new_domain = Orange.data.Domain(
            self.data.domain.attributes + tuple(attrs),
            self.data.domain.class_vars,
            metas=self.data.domain.metas + tuple(metas))

        try:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = False
            data = self.data.transform(new_domain)
        # user's expression can contain arbitrary errors
        # pylint: disable=broad-except
        except Exception as err:
            report_error(err)
            return
        finally:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = True

        disc_attrs_not_ok = self.check_attrs_values(
            [var for var in attrs if var.is_discrete], data)
        if disc_attrs_not_ok:
            self.Error.more_values_needed(disc_attrs_not_ok)
            return

        self.Outputs.data.send(data)

    def send_report(self):
        items = OrderedDict()
        for feature in self.featuremodel:
            if isinstance(feature, DiscreteDescriptor):
                desc = "categorical"
                if feature.values:
                    desc += " with values " \
                            + ", ".join(f"'{val}'" for val in feature.values)
                if feature.ordered:
                    desc += "; ordered"
            elif isinstance(feature, ContinuousDescriptor):
                desc = "numeric"
            elif isinstance(feature, DateTimeDescriptor):
                desc = "date/time"
            else:
                desc = "text"
            items[feature.name] = f"{feature.expression} ({desc})"
        self.report_items(report.plural("Constructed feature{s}", len(items)),
                          items)

    def fix_expressions(self):
        dlg = QMessageBox(
            QMessageBox.Question, "Fix Expressions",
            "This widget's behaviour has changed. Values of categorical "
            "variables are now inserted as their textual representations "
            "(strings); previously they appeared as integer numbers, with an "
            "attribute '.value' that contained the text.\n\n"
            "The widget currently runs in compatibility mode. After "
            "expressions are updated, manually check for their correctness.")
        dlg.addButton("Update", QMessageBox.ApplyRole)
        dlg.addButton("Cancel", QMessageBox.RejectRole)
        if dlg.exec() == QMessageBox.RejectRole:
            return

        def fixer(mo):
            var = domain[mo.group(2)]
            if mo.group(3) == ".value":  # uses string; remove `.value`
                return "".join(mo.group(1, 2, 4))
            # Uses ints: get them by indexing
            return mo.group(1) + "{" + \
                   ", ".join(f"'{val}': {i}"
                             for i, val in enumerate(var.values)) + \
                   f"}}[{var.name}]" + mo.group(4)

        domain = self.data.domain
        disc_vars = "|".join(f"{var.name}"
                             for var in chain(domain.variables, domain.metas)
                             if var.is_discrete)
        expr = re.compile(r"(^|\W)(" + disc_vars + r")(\.value)?(\W|$)")
        self.descriptors[:] = [
            descriptor._replace(
                expression=expr.sub(fixer, descriptor.expression))
            for descriptor in self.descriptors
        ]

        self.expressions_with_values = False
        self.fix_button.hide()
        index = self.currentIndex
        self.featuremodel[:] = list(self.descriptors)
        self.setCurrentIndex(index)
        self.apply()

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 2:
            used_vars = set(
                chain(*(
                    freevars(ast.parse(descriptor.expression, mode="eval"), [])
                    for descriptor in context.values["descriptors"]
                    if descriptor.expression)))
            disc_vars = {
                name
                for (name, vtype) in chain(context.attributes.items(),
                                           context.metas.items()) if vtype == 1
            }
            if used_vars & disc_vars:
                context.values["expressions_with_values"] = True
Beispiel #30
0
class OWSelectRows(widget.OWWidget):
    name = "Select Rows"
    id = "Orange.widgets.data.file"
    description = "Select rows from the data based on values of variables."
    icon = "icons/SelectRows.svg"
    priority = 100
    category = "Data"
    keywords = ["filter"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        matching_data = Output("Matching Data", Table, default=True)
        unmatched_data = Output("Unmatched Data", Table)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    want_main_area = False

    settingsHandler = SelectRowsContextHandler()
    conditions = ContextSetting([])
    update_on_change = Setting(True)
    purge_attributes = Setting(False, schema_only=True)
    purge_classes = Setting(False, schema_only=True)
    auto_commit = Setting(True)

    settings_version = 2

    Operators = {
        ContinuousVariable: [
            (FilterContinuous.Equal, "equals"),
            (FilterContinuous.NotEqual, "is not"),
            (FilterContinuous.Less, "is below"),
            (FilterContinuous.LessEqual, "is at most"),
            (FilterContinuous.Greater, "is greater than"),
            (FilterContinuous.GreaterEqual, "is at least"),
            (FilterContinuous.Between, "is between"),
            (FilterContinuous.Outside, "is outside"),
            (FilterContinuous.IsDefined, "is defined"),
        ],
        DiscreteVariable: [
            (FilterDiscreteType.Equal, "is"),
            (FilterDiscreteType.NotEqual, "is not"),
            (FilterDiscreteType.In, "is one of"),
            (FilterDiscreteType.IsDefined, "is defined")
        ],
        StringVariable: [
            (FilterString.Equal, "equals"),
            (FilterString.NotEqual, "is not"),
            (FilterString.Less, "is before"),
            (FilterString.LessEqual, "is equal or before"),
            (FilterString.Greater, "is after"),
            (FilterString.GreaterEqual, "is equal or after"),
            (FilterString.Between, "is between"),
            (FilterString.Outside, "is outside"),
            (FilterString.Contains, "contains"),
            (FilterString.StartsWith, "begins with"),
            (FilterString.EndsWith, "ends with"),
            (FilterString.IsDefined, "is defined"),
        ]
    }

    Operators[TimeVariable] = Operators[ContinuousVariable]

    AllTypes = {}
    for _all_name, _all_type, _all_ops in (
            ("All variables", 0,
             [(None, "are defined")]),
            ("All numeric variables", 2,
             [(v, _plural(t)) for v, t in Operators[ContinuousVariable]]),
            ("All string variables", 3,
             [(v, _plural(t)) for v, t in Operators[StringVariable]])):
        Operators[_all_name] = _all_ops
        AllTypes[_all_name] = _all_type

    operator_names = {vtype: [name for _, name in filters]
                      for vtype, filters in Operators.items()}

    class Error(widget.OWWidget.Error):
        parsing_error = Msg("{}")

    def __init__(self):
        super().__init__()

        self.old_purge_classes = True

        self.conditions = []
        self.last_output_conditions = None
        self.data = None
        self.data_desc = self.match_desc = self.nonmatch_desc = None

        box = gui.vBox(self.controlArea, 'Conditions', stretch=100)
        self.cond_list = QTableWidget(
            box, showGrid=False, selectionMode=QTableWidget.NoSelection)
        box.layout().addWidget(self.cond_list)
        self.cond_list.setColumnCount(4)
        self.cond_list.setRowCount(0)
        self.cond_list.verticalHeader().hide()
        self.cond_list.horizontalHeader().hide()
        for i in range(3):
            self.cond_list.horizontalHeader().setSectionResizeMode(i, QHeaderView.Stretch)
        self.cond_list.horizontalHeader().resizeSection(3, 30)
        self.cond_list.viewport().setBackgroundRole(QPalette.Window)

        box2 = gui.hBox(box)
        gui.rubber(box2)
        self.add_button = gui.button(
            box2, self, "Add Condition", callback=self.add_row)
        self.add_all_button = gui.button(
            box2, self, "Add All Variables", callback=self.add_all)
        self.remove_all_button = gui.button(
            box2, self, "Remove All", callback=self.remove_all)
        gui.rubber(box2)

        boxes = gui.widgetBox(self.controlArea, orientation=QHBoxLayout())
        layout = boxes.layout()

        box_setting = gui.vBox(boxes, addToLayout=False, box=True)
        self.cb_pa = gui.checkBox(
            box_setting, self, "purge_attributes", "Remove unused features",
            callback=self.conditions_changed)
        gui.separator(box_setting, height=1)
        self.cb_pc = gui.checkBox(
            box_setting, self, "purge_classes", "Remove unused classes",
            callback=self.conditions_changed)
        layout.addWidget(box_setting, 1)

        self.report_button.setFixedWidth(120)
        gui.rubber(self.buttonsArea.layout())
        layout.addWidget(self.buttonsArea)

        acbox = gui.auto_send(None, self, "auto_commit")
        layout.addWidget(acbox, 1)
        layout.setAlignment(acbox, Qt.AlignBottom)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        self.set_data(None)
        self.resize(600, 400)

    def add_row(self, attr=None, condition_type=None, condition_value=None):
        model = self.cond_list.model()
        row = model.rowCount()
        model.insertRow(row)

        attr_combo = gui.OrangeComboBox(
            minimumContentsLength=12,
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon)
        attr_combo.row = row
        for var in self._visible_variables(self.data.domain):
            if isinstance(var, Variable):
                attr_combo.addItem(*gui.attributeItem(var))
            else:
                attr_combo.addItem(var)
        if isinstance(attr, str):
            attr_combo.setCurrentText(attr)
        else:
            attr_combo.setCurrentIndex(
                attr or
                len(self.AllTypes) - (attr_combo.count() == len(self.AllTypes)))
        self.cond_list.setCellWidget(row, 0, attr_combo)

        index = QPersistentModelIndex(model.index(row, 3))
        temp_button = QPushButton('×', self, flat=True,
                                  styleSheet='* {font-size: 16pt; color: silver}'
                                             '*:hover {color: black}')
        temp_button.clicked.connect(lambda: self.remove_one(index.row()))
        self.cond_list.setCellWidget(row, 3, temp_button)

        self.remove_all_button.setDisabled(False)
        self.set_new_operators(attr_combo, attr is not None,
                               condition_type, condition_value)
        attr_combo.currentIndexChanged.connect(
            lambda _: self.set_new_operators(attr_combo, False))

        self.cond_list.resizeRowToContents(row)

    @classmethod
    def _visible_variables(cls, domain):
        """Generate variables in order they should be presented in in combos."""
        return chain(
            cls.AllTypes,
            filter_visible(chain(domain.class_vars,
                                 domain.metas,
                                 domain.attributes)))

    def add_all(self):
        if self.cond_list.rowCount():
            Mb = QMessageBox
            if Mb.question(
                    self, "Remove existing filters",
                    "This will replace the existing filters with "
                    "filters for all variables.", Mb.Ok | Mb.Cancel) != Mb.Ok:
                return
            self.remove_all()
        domain = self.data.domain
        for i in range(len(domain.variables) + len(domain.metas)):
            self.add_row(i)

    def remove_one(self, rownum):
        self.remove_one_row(rownum)
        self.conditions_changed()

    def remove_all(self):
        self.remove_all_rows()
        self.conditions_changed()

    def remove_one_row(self, rownum):
        self.cond_list.removeRow(rownum)
        if self.cond_list.model().rowCount() == 0:
            self.remove_all_button.setDisabled(True)

    def remove_all_rows(self):
        self.cond_list.clear()
        self.cond_list.setRowCount(0)
        self.remove_all_button.setDisabled(True)

    def set_new_operators(self, attr_combo, adding_all,
                          selected_index=None, selected_values=None):
        oper_combo = QComboBox()
        oper_combo.row = attr_combo.row
        oper_combo.attr_combo = attr_combo
        attr_name = attr_combo.currentText()
        if attr_name in self.AllTypes:
            oper_combo.addItems(self.operator_names[attr_name])
        else:
            var = self.data.domain[attr_name]
            oper_combo.addItems(self.operator_names[type(var)])
        oper_combo.setCurrentIndex(selected_index or 0)
        self.cond_list.setCellWidget(oper_combo.row, 1, oper_combo)
        self.set_new_values(oper_combo, adding_all, selected_values)
        oper_combo.currentIndexChanged.connect(
            lambda _: self.set_new_values(oper_combo, False))

    @staticmethod
    def _get_lineedit_contents(box):
        return [child.text() for child in getattr(box, "controls", [box])
                if isinstance(child, QLineEdit)]

    @staticmethod
    def _get_value_contents(box):
        cont = []
        names = []
        for child in getattr(box, "controls", [box]):
            if isinstance(child, QLineEdit):
                cont.append(child.text())
            elif isinstance(child, QComboBox):
                cont.append(child.currentIndex())
            elif isinstance(child, QToolButton):
                if child.popup is not None:
                    model = child.popup.list_view.model()
                    for row in range(model.rowCount()):
                        item = model.item(row)
                        if item.checkState():
                            cont.append(row + 1)
                            names.append(item.text())
                    child.desc_text = ', '.join(names)
                    child.set_text()
            elif isinstance(child, QLabel) or child is None:
                pass
            else:
                raise TypeError('Type %s not supported.' % type(child))
        return tuple(cont)

    class QDoubleValidatorEmpty(QDoubleValidator):
        def validate(self, input_, pos):
            if not input_:
                return QDoubleValidator.Acceptable, input_, pos
            if self.locale().groupSeparator() in input_:
                return QDoubleValidator.Invalid, input_, pos
            return super().validate(input_, pos)

    def set_new_values(self, oper_combo, adding_all, selected_values=None):
        # def remove_children():
        #     for child in box.children()[1:]:
        #         box.layout().removeWidget(child)
        #         child.setParent(None)

        def add_textual(contents):
            le = gui.lineEdit(box, self, None,
                              sizePolicy=QSizePolicy(QSizePolicy.Expanding,
                                                     QSizePolicy.Expanding))
            if contents:
                le.setText(contents)
            le.setAlignment(Qt.AlignRight)
            le.editingFinished.connect(self.conditions_changed)
            return le

        def add_numeric(contents):
            le = add_textual(contents)
            le.setValidator(OWSelectRows.QDoubleValidatorEmpty())
            return le

        def add_datetime(contents):
            le = add_textual(contents)
            le.setValidator(QRegExpValidator(QRegExp(TimeVariable.REGEX)))
            return le

        box = self.cond_list.cellWidget(oper_combo.row, 2)
        lc = ["", ""]
        oper = oper_combo.currentIndex()
        attr_name = oper_combo.attr_combo.currentText()
        if attr_name in self.AllTypes:
            vtype = self.AllTypes[attr_name]
            var = None
        else:
            var = self.data.domain[attr_name]
            vtype = vartype(var)
            if selected_values is not None:
                lc = list(selected_values) + ["", ""]
                lc = [str(x) for x in lc[:2]]
        if box and vtype == box.var_type:
            lc = self._get_lineedit_contents(box) + lc

        if oper_combo.currentText().endswith(" defined"):
            label = QLabel()
            label.var_type = vtype
            self.cond_list.setCellWidget(oper_combo.row, 2, label)
        elif var is not None and var.is_discrete:
            if oper_combo.currentText().endswith(" one of"):
                if selected_values:
                    lc = [x for x in list(selected_values)]
                button = DropDownToolButton(self, var, lc)
                button.var_type = vtype
                self.cond_list.setCellWidget(oper_combo.row, 2, button)
            else:
                combo = QComboBox()
                combo.addItems(("", ) + var.values)
                if lc[0]:
                    combo.setCurrentIndex(int(lc[0]))
                else:
                    combo.setCurrentIndex(0)
                combo.var_type = vartype(var)
                self.cond_list.setCellWidget(oper_combo.row, 2, combo)
                combo.currentIndexChanged.connect(self.conditions_changed)
        else:
            box = gui.hBox(self, addToLayout=False)
            box.var_type = vtype
            self.cond_list.setCellWidget(oper_combo.row, 2, box)
            if vtype in (2, 4):  # continuous, time:
                validator = add_datetime if isinstance(var, TimeVariable) else add_numeric
                box.controls = [validator(lc[0])]
                if oper > 5:
                    gui.widgetLabel(box, " and ")
                    box.controls.append(validator(lc[1]))
            elif vtype == 3:  # string:
                box.controls = [add_textual(lc[0])]
                if oper in [6, 7]:
                    gui.widgetLabel(box, " and ")
                    box.controls.append(add_textual(lc[1]))
            else:
                box.controls = []
        if not adding_all:
            self.conditions_changed()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self.cb_pa.setEnabled(not isinstance(data, SqlTable))
        self.cb_pc.setEnabled(not isinstance(data, SqlTable))
        self.remove_all_rows()
        self.add_button.setDisabled(data is None)
        self.add_all_button.setDisabled(
            data is None or
            len(data.domain.variables) + len(data.domain.metas) > 100)
        if not data:
            self.info.set_input_summary(self.info.NoInput)
            self.data_desc = None
            self.commit()
            return
        self.data_desc = report.describe_data_brief(data)
        self.conditions = []
        try:
            self.openContext(data)
        except Exception:
            pass

        variables = list(self._visible_variables(self.data.domain))
        varnames = [v.name if isinstance(v, Variable) else v for v in variables]
        if self.conditions:
            for attr, cond_type, cond_value in self.conditions:
                if attr in varnames:
                    self.add_row(varnames.index(attr), cond_type, cond_value)
                elif attr in self.AllTypes:
                    self.add_row(attr, cond_type, cond_value)
        else:
            self.add_row()

        self.info.set_input_summary(len(data),
                                    format_summary_details(data))
        self.unconditional_commit()

    def conditions_changed(self):
        try:
            self.conditions = []
            self.conditions = [
                (self.cond_list.cellWidget(row, 0).currentText(),
                 self.cond_list.cellWidget(row, 1).currentIndex(),
                 self._get_value_contents(self.cond_list.cellWidget(row, 2)))
                for row in range(self.cond_list.rowCount())]
            if self.update_on_change and (
                    self.last_output_conditions is None or
                    self.last_output_conditions != self.conditions):
                self.commit()
        except AttributeError:
            # Attribute error appears if the signal is triggered when the
            # controls are being constructed
            pass

    def _values_to_floats(self, attr, values):
        if not len(values):
            return values
        if not all(values):
            return None
        if isinstance(attr, TimeVariable):
            parse = lambda x: (attr.parse(x), True)
        else:
            parse = QLocale().toDouble

        try:
            floats, ok = zip(*[parse(v) for v in values])
            if not all(ok):
                raise ValueError('Some values could not be parsed as floats'
                                 'in the current locale: {}'.format(values))
        except TypeError:
            floats = values  # values already floats
        assert all(isinstance(v, float) for v in floats)
        return floats

    def commit(self):
        matching_output = self.data
        non_matching_output = None
        annotated_output = None

        self.Error.clear()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                if attr_name in self.AllTypes:
                    attr_index = attr = None
                    attr_type = self.AllTypes[attr_name]
                    operators = self.Operators[attr_name]
                else:
                    attr_index = domain.index(attr_name)
                    attr = domain[attr_index]
                    attr_type = vartype(attr)
                    operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr_type == 0:
                    filter = data_filter.IsDefined()
                elif attr_type in (2, 4):  # continuous, time
                    try:
                        floats = self._values_to_floats(attr, values)
                    except ValueError as e:
                        self.Error.parsing_error(e.args[0])
                        return
                    if floats is None:
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, opertype, *floats)
                elif attr_type == 3:  # string
                    filter = data_filter.FilterString(
                        attr_index, opertype, *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i-1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                self.filters = data_filter.Values(conditions)
                matching_output = self.filters(self.data)
                self.filters.negate = True
                non_matching_output = self.filters(self.data)

                row_sel = np.in1d(self.data.ids, matching_output.ids)
                annotated_output = create_annotated_table(self.data, row_sel)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([Remove.RemoveConstant * purge_attrs,
                                  Remove.RemoveUnusedValues * purge_attrs])
                class_flags = sum([Remove.RemoveConstant * purge_classes,
                                   Remove.RemoveUnusedValues * purge_classes])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)
                annotated_output = remover(annotated_output)

        if matching_output is not None and not len(matching_output):
            matching_output = None
        if non_matching_output is not None and not len(non_matching_output):
            non_matching_output = None
        if annotated_output is not None and not len(annotated_output):
            annotated_output = None

        self.Outputs.matching_data.send(matching_output)
        self.Outputs.unmatched_data.send(non_matching_output)
        self.Outputs.annotated_data.send(annotated_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

        summary = len(matching_output) if matching_output else self.info.NoOutput
        details = format_summary_details(matching_output) if matching_output else ""
        self.info.set_output_summary(summary, details)

    def send_report(self):
        if not self.data:
            self.report_paragraph("No data.")
            return

        pdesc = None
        describe_domain = False
        for d in (self.data_desc, self.match_desc, self.nonmatch_desc):
            if not d or not d["Data instances"]:
                continue
            ndesc = d.copy()
            del ndesc["Data instances"]
            if pdesc is not None and pdesc != ndesc:
                describe_domain = True
            pdesc = ndesc

        conditions = []
        domain = self.data.domain
        for attr_name, oper, values in self.conditions:
            if attr_name in self.AllTypes:
                attr = attr_name
                names = self.operator_names[attr_name]
                var_type = self.AllTypes[attr_name]
            else:
                attr = domain[attr_name]
                var_type = vartype(attr)
                names = self.operator_names[type(attr)]
            name = names[oper]
            if oper == len(names) - 1:
                conditions.append("{} {}".format(attr, name))
            elif var_type == 1:  # discrete
                if name == "is one of":
                    valnames = [attr.values[v - 1] for v in values]
                    if not valnames:
                        continue
                    if len(valnames) == 1:
                        valstr = valnames[0]
                    else:
                        valstr = f"{', '.join(valnames[:-1])} or {valnames[-1]}"
                    conditions.append(f"{attr} is {valstr}")
                elif values and values[0]:
                    value = values[0] - 1
                    conditions.append(f"{attr} {name} {attr.values[value]}")
            elif var_type == 3:  # string variable
                conditions.append(
                    f"{attr} {name} {' and '.join(map(repr, values))}")
            elif all(x for x in values):  # numeric variable
                conditions.append(f"{attr} {name} {' and '.join(values)}")
        items = OrderedDict()
        if describe_domain:
            items.update(self.data_desc)
        else:
            items["Instances"] = self.data_desc["Data instances"]
        items["Condition"] = " AND ".join(conditions) or "no conditions"
        self.report_items("Data", items)
        if describe_domain:
            self.report_items("Matching data", self.match_desc)
            self.report_items("Non-matching data", self.nonmatch_desc)
        else:
            match_inst = \
                bool(self.match_desc) and \
                self.match_desc["Data instances"]
            nonmatch_inst = \
                bool(self.nonmatch_desc) and \
                self.nonmatch_desc["Data instances"]
            self.report_items(
                "Output",
                (("Matching data",
                  "{} instances".format(match_inst) if match_inst else "None"),
                 ("Non-matching data",
                  nonmatch_inst > 0 and "{} instances".format(nonmatch_inst))))
class OWExplainPredictions(OWWidget, ConcurrentWidgetMixin):
    name = "Explain Predictions"
    description = "Predictions explanation widget."
    keywords = ["explain", "explain prediction", "explain model"]
    icon = "icons/ExplainPredictions.svg"
    priority = 120

    class Inputs:
        model = Input("Model", Model)
        background_data = Input("Background Data", Table)
        data = Input("Data", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        scores = Output("Scores", Table)

    class Error(OWWidget.Error):
        domain_transform_err = Msg("{}")
        unknown_err = Msg("{}")
        not_enough_data = Msg("At least two instances are needed.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Data has been sampled.")

    buttons_area_orientation = Qt.Vertical

    settingsHandler = PerfectDomainContextHandler()
    target_index = ContextSetting(0)
    order_index = ContextSetting(0)
    annot_index = ContextSetting(0)
    show_tooltip = Setting(True)
    highlight_feature = Setting(True)
    selection_ranges = Setting([], schema_only=True)
    auto_send = Setting(True)
    visual_settings = Setting({}, schema_only=True)

    graph_name = "graph.plotItem"

    ANNOTATIONS = ["None", "Enumeration"]

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.__results: Optional[RunnerResults] = None
        self.model: Optional[Model] = None
        self.background_data: Optional[Table] = None
        self.data: Optional[Table] = None
        # cached instance indices after instance ordering
        self.__data_idxs: Optional[np.ndarray] = None
        self.__pending_selection: List[Tuple[float, float]] = \
            self.selection_ranges

        self.graph: ForcePlot = None
        self._target_combo: QComboBox = None
        self._order_combo: QComboBox = None
        self._annot_combo: QComboBox = None

        self.setup_gui()

        initial_settings = self.graph.parameter_setter.initial_settings
        VisualSettingsDialog(self, initial_settings)

    def setup_gui(self):
        self._add_plot()
        self._add_controls()
        self._add_buttons()

    def _add_plot(self):
        box = gui.vBox(self.mainArea)
        self.graph = ForcePlot(self)
        self.graph.set_show_tooltip(self.show_tooltip)
        self.graph.set_highlight_feature(self.highlight_feature)
        self.graph.selectionChanged.connect(self.__on_selection_changed)
        box.layout().addWidget(self.graph)

    def __on_selection_changed(self, selection: List[Tuple[float, float]]):
        self.selection_ranges = selection
        self.commit()

    def _add_controls(self):
        box = gui.vBox(self.controlArea, "Target class")
        self._target_combo = gui.comboBox(box, self, "target_index",
                                          callback=self.__on_target_changed,
                                          contentsLength=12)

        box = gui.vBox(self.controlArea, "Instance order")
        self._order_combo = gui.comboBox(box, self, "order_index",
                                         callback=self.__on_order_changed,
                                         searchable=True, contentsLength=12)
        model = VariableListModel()
        model[:] = INSTANCE_ORDERINGS
        self._order_combo.setModel(model)

        box = gui.vBox(self.controlArea, "Annotation")
        self._annot_combo = gui.comboBox(box, self, "annot_index",
                                         callback=self.__on_annot_changed,
                                         searchable=True, contentsLength=12)
        model = VariableListModel()
        model[:] = self.ANNOTATIONS
        self._annot_combo.setModel(model)

        box = gui.vBox(self.controlArea, "", margin=True,
                       contentsMargins=(8, 4, 8, 4))
        gui.checkBox(box, self, "show_tooltip", "Show tooltips",
                     callback=self.__on_show_tooltip_changed)
        gui.checkBox(box, self, "highlight_feature",
                     "Highlight feature on hover",
                     callback=self.__on_highlight_feature_changed)

        gui.rubber(self.controlArea)

    def __on_target_changed(self):
        self.selection_ranges = []
        self.setup_plot()
        self.commit()

    def __on_order_changed(self):
        self.selection_ranges = []
        self.setup_plot()
        self.commit()

    def __on_annot_changed(self):
        if not self.__results or not self.data:
            return
        self._set_plot_annotations()

    def __on_show_tooltip_changed(self):
        self.graph.set_show_tooltip(self.show_tooltip)

    def __on_highlight_feature_changed(self):
        self.graph.set_highlight_feature(self.highlight_feature)

    def _add_buttons(self):
        plot_gui = OWPlotGUI(self)
        plot_gui.box_zoom_select(self.buttonsArea)
        gui.auto_send(self.buttonsArea, self, "auto_send")

    @Inputs.data
    @check_sql_input
    def set_data(self, data: Optional[Table]):
        self.closeContext()
        self.data = data
        self._check_data()
        self._setup_controls()
        self.openContext(self.data.domain if self.data else None)

    @Inputs.background_data
    @check_sql_input
    def set_background_data(self, data: Optional[Table]):
        self.background_data = data

    @Inputs.model
    def set_model(self, model: Optional[Model]):
        self.model = model

    def _check_data(self):
        self.Error.not_enough_data.clear()
        if self.data and len(self.data) < 2:
            self.data = None
            self.Error.not_enough_data()

    def _setup_controls(self):
        self._target_combo.clear()
        self._target_combo.setEnabled(True)

        self.order_index = 0
        self.annot_index = 0
        self._order_combo.clear()
        self._annot_combo.clear()
        orderings = INSTANCE_ORDERINGS
        annotations = self.ANNOTATIONS

        if self.data:
            domain = self.data.domain
            if domain.has_discrete_class:
                self._target_combo.addItems(domain.class_var.values)
                self.target_index = 0
            elif domain.has_continuous_class:
                self.target_index = -1
                self._target_combo.setEnabled(False)

            orderings = chain(
                INSTANCE_ORDERINGS,
                [VariableListModel.Separator] if domain.metas else [],
                domain.metas,
                [VariableListModel.Separator] if domain.class_vars else [],
                domain.class_vars,
                [VariableListModel.Separator] if domain.attributes else [],
                domain.attributes,
            )

            annotations = chain(
                self.ANNOTATIONS,
                [VariableListModel.Separator] if domain.metas else [],
                domain.metas,
                [VariableListModel.Separator] if domain.class_vars else [],
                domain.class_vars,
                [VariableListModel.Separator] if domain.attributes else [],
                domain.attributes,
            )

        self._order_combo.model()[:] = orderings
        self._annot_combo.model()[:] = annotations

    def handleNewSignals(self):
        self.clear()
        self.start(run, self.data, self.background_data, self.model)
        self.commit()

    def clear(self):
        self.__results = None
        self.cancel()
        self.Error.domain_transform_err.clear()
        self.Error.unknown_err.clear()
        self.Information.data_sampled.clear()
        self.selection_ranges = []
        self.graph.clear_all()
        self.graph.set_axis(None)
        self.__data_idxs = None

    def setup_plot(self):
        self.graph.clear_all()
        self.__data_idxs = None
        if not self.__results or not self.data:
            return

        order = self._order_combo.model()[self.order_index]
        values_idxs = get_instance_ordering(
            self.__results.values[self.target_index],
            self.__results.predictions[self.__results.mask, self.target_index],
            self.data[self.__results.mask],
            order
        )

        data_idxs = np.arange(len(self.data))
        self.__data_idxs = data_idxs[self.__results.mask][values_idxs]

        x_data, pos_y_data, neg_y_data, pos_labels, neg_labels = \
            prepare_force_plot_data_multi_inst(
                self.__results.values[self.target_index][values_idxs],
                self.__results.base_value[self.target_index],
                self.model.domain
            )

        if self.order_index == 0:
            order = "hierarhical clustering"
        elif self.order_index == 1:
            order = "output value"
        elif self.order_index == 2:
            order = "original ordering"
        x_label = f"Instances ordered by {order}"

        target = self.model.domain.class_var
        if self.model.domain.has_discrete_class:
            target = f"{target} = {target.values[self.target_index]}"
        y_label = f"Output value ({target})"

        self.graph.set_data(x_data, pos_y_data, neg_y_data,
                            pos_labels, neg_labels, x_label, y_label,
                            self.__results.transformed_data[self.__data_idxs])
        self._set_plot_annotations()

    def _set_plot_annotations(self):
        annotator = self._annot_combo.model()[self.annot_index]
        if isinstance(annotator, Variable):
            ticks = [[(i, str(row[annotator].value)) for i, row in
                      enumerate(self.data[self.__data_idxs])]]
            self.graph.set_axis(ticks)
        elif annotator == "None":
            self.graph.set_axis([])
        elif annotator == "Enumeration":
            ticks = [[(i, str(idx + 1)) for i, idx in
                      enumerate(self.__data_idxs)]]
            self.graph.set_axis(ticks)
        else:
            raise NotImplementedError(annotator)

    def on_partial_result(self, _):
        pass

    def on_done(self, results: Optional[RunnerResults]):
        self.__results = results
        if results is not None and not all(results.mask):
            self.Information.data_sampled()
        self.setup_plot()
        self.apply_selection()
        self.output_scores()

    def on_exception(self, ex: Exception):
        if isinstance(ex, DomainTransformationError):
            self.Error.domain_transform_err(ex)
        else:
            self.Error.unknown_err(ex)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def apply_selection(self):
        selection_ranges = self.selection_ranges or self.__pending_selection
        if selection_ranges:
            self.graph.apply_selection(selection_ranges)
            self.__on_selection_changed(selection_ranges)
            self.__pending_selection = []

    def commit(self):
        selected = None
        selected_indices = []

        if self.__results:
            selection = list(set(
                chain.from_iterable(
                    range(int(np.ceil(start)), int(np.floor(stop) + 1))
                    for start, stop in self.selection_ranges)
            ))
            selected_indices = sorted(self.__data_idxs[selection])

        if self.data and selected_indices:
            selected = self.data[selected_indices]
        annotated = create_annotated_table(self.data, selected_indices)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)

    def output_scores(self):
        scores = None
        if self.__results is not None:
            mask = self.__results.mask
            data = self.__results.transformed_data[mask]
            domain = data.domain
            attrs = [ContinuousVariable(f"S({a.name})")
                     for a in domain.attributes]
            domain = Domain(attrs, domain.class_vars, domain.metas)
            scores = self.__results.values[self.target_index]
            scores = Table(domain, scores, data.Y, data.metas)
            scores.name = "Feature Scores"
        self.Outputs.scores.send(scores)

    def send_report(self):
        if not self.data or not self.background_data or not self.model:
            return
        items = {"Target class": "None"}
        if self.model.domain.has_discrete_class:
            class_var = self.model.domain.class_var
            items["Target class"] = class_var.values[self.target_index]
        self.report_items(items)
        self.report_plot()

    def set_visual_settings(self, key: Tuple[str, str, str], value: Any):
        self.visual_settings[key] = value
        self.graph.parameter_setter.set_parameter(key, value)
class ImagePlot(QWidget, OWComponent, SelectionGroupMixin):

    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    gamma = Setting(0)
    threshold_low = Setting(0.0)
    threshold_high = Setting(1.0)
    palette_index = Setting(0)
    selection_changed = Signal()

    def __init__(self, parent):
        QWidget.__init__(self)
        OWComponent.__init__(self, parent)
        SelectionGroupMixin.__init__(self)

        self.parent = parent

        self.selection_type = SELECTMANY
        self.saving_enabled = hasattr(self.parent, "save_graph")
        self.selection_enabled = True
        self.viewtype = INDIVIDUAL  # required bt InteractiveViewBox
        self.highlighted = None
        self.data_points = None
        self.data_values = None
        self.data_imagepixels = None

        self.plotview = pg.PlotWidget(background="w",
                                      viewBox=InteractiveViewBox(self))
        self.plot = self.plotview.getPlotItem()

        self.plot.scene().installEventFilter(
            HelpEventDelegate(self.help_event, self))

        layout = QVBoxLayout()
        self.setLayout(layout)
        self.layout().setContentsMargins(0, 0, 0, 0)
        self.layout().addWidget(self.plotview)

        self.img = ImageItemNan()
        self.img.setOpts(axisOrder='row-major')
        self.plot.addItem(self.img)
        self.plot.vb.setAspectLocked()
        self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent)

        layout = QGridLayout()
        self.plotview.setLayout(layout)
        self.button = QPushButton("View", self.plotview)
        self.button.setAutoDefault(False)

        layout.setRowStretch(1, 1)
        layout.setColumnStretch(1, 1)
        layout.addWidget(self.button, 0, 0)
        view_menu = MenuFocus(self)
        self.button.setMenu(view_menu)

        # prepare interface according to the new context
        self.parent.contextAboutToBeOpened.connect(
            lambda x: self.init_interface_data(x[0]))

        actions = []

        zoom_in = QAction("Zoom in",
                          self,
                          triggered=self.plot.vb.set_mode_zooming)
        zoom_in.setShortcuts([Qt.Key_Z, QKeySequence(QKeySequence.ZoomIn)])
        zoom_in.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(zoom_in)
        zoom_fit = QAction(
            "Zoom to fit",
            self,
            triggered=lambda x:
            (self.plot.vb.autoRange(), self.plot.vb.set_mode_panning()))
        zoom_fit.setShortcuts(
            [Qt.Key_Backspace,
             QKeySequence(Qt.ControlModifier | Qt.Key_0)])
        zoom_fit.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(zoom_fit)
        select_square = QAction(
            "Select (square)",
            self,
            triggered=self.plot.vb.set_mode_select_square,
        )
        select_square.setShortcuts([Qt.Key_S])
        select_square.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(select_square)

        select_polygon = QAction(
            "Select (polygon)",
            self,
            triggered=self.plot.vb.set_mode_select_polygon,
        )
        select_polygon.setShortcuts([Qt.Key_P])
        select_polygon.setShortcutContext(Qt.WidgetWithChildrenShortcut)
        actions.append(select_polygon)

        if self.saving_enabled:
            save_graph = QAction(
                "Save graph",
                self,
                triggered=self.save_graph,
            )
            save_graph.setShortcuts(
                [QKeySequence(Qt.ControlModifier | Qt.Key_I)])
            actions.append(save_graph)

        view_menu.addActions(actions)
        self.addActions(actions)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)

        choose_xy = QWidgetAction(self)
        box = gui.vBox(self)
        box.setFocusPolicy(Qt.TabFocus)
        self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                    valid_types=DomainModel.PRIMITIVE)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)
        box.setFocusProxy(self.cb_attr_x)

        self.color_cb = gui.comboBox(box,
                                     self,
                                     "palette_index",
                                     label="Color:",
                                     labelWidth=50,
                                     orientation=Qt.Horizontal)
        self.color_cb.setIconSize(QSize(64, 16))
        palettes = _color_palettes

        self.palette_index = min(self.palette_index, len(palettes) - 1)

        model = color_palette_model(palettes, self.color_cb.iconSize())
        model.setParent(self)
        self.color_cb.setModel(model)
        self.color_cb.activated.connect(self.update_color_schema)

        self.color_cb.setCurrentIndex(self.palette_index)

        form = QFormLayout(formAlignment=Qt.AlignLeft,
                           labelAlignment=Qt.AlignLeft,
                           fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow)

        lowslider = gui.hSlider(box,
                                self,
                                "threshold_low",
                                minValue=0.0,
                                maxValue=1.0,
                                step=0.05,
                                ticks=True,
                                intOnly=False,
                                createLabel=False,
                                callback=self.update_color_schema)
        highslider = gui.hSlider(box,
                                 self,
                                 "threshold_high",
                                 minValue=0.0,
                                 maxValue=1.0,
                                 step=0.05,
                                 ticks=True,
                                 intOnly=False,
                                 createLabel=False,
                                 callback=self.update_color_schema)

        form.addRow("Low:", lowslider)
        form.addRow("High:", highslider)

        box.layout().addLayout(form)

        choose_xy.setDefaultWidget(box)
        view_menu.addAction(choose_xy)

        self.markings_integral = []

        self.lsx = None  # info about the X axis
        self.lsy = None  # info about the Y axis

        self.data = None
        self.data_ids = {}

    def init_interface_data(self, data):
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def help_event(self, ev):
        pos = self.plot.vb.mapSceneToView(ev.scenePos())
        sel = self._points_at_pos(pos)
        prepared = []
        if sel is not None:
            data, vals, points = self.data[sel], self.data_values[
                sel], self.data_points[sel]
            for d, v, p in zip(data, vals, points):
                basic = "({}, {}): {}".format(p[0], p[1], v)
                variables = [
                    v for v in self.data.domain.metas +
                    self.data.domain.class_vars
                    if v not in [self.attr_x, self.attr_y]
                ]
                features = [
                    '{} = {}'.format(attr.name, d[attr]) for attr in variables
                ]
                prepared.append("\n".join([basic] + features))
        text = "\n\n".join(prepared)
        if text:
            text = ('<span style="white-space:pre">{}</span>'.format(
                escape(text)))
            QToolTip.showText(ev.screenPos(), text, widget=self.plotview)
            return True
        else:
            return False

    def update_color_schema(self):
        if not self.threshold_low < self.threshold_high:
            # TODO this belongs here, not in the parent
            self.parent.Warning.threshold_error()
            return
        else:
            self.parent.Warning.threshold_error.clear()
        data = self.color_cb.itemData(self.palette_index, role=Qt.UserRole)
        _, colors = max(data.items())
        cols = color_palette_table(colors,
                                   threshold_low=self.threshold_low,
                                   threshold_high=self.threshold_high)

        self.img.setLookupTable(cols)

        # use defined discrete palette
        if self.parent.value_type == 1:
            dat = self.data.domain[self.parent.attr_value]
            if isinstance(dat, DiscreteVariable):
                self.img.setLookupTable(dat.colors)

    def update_attr(self):
        self.update_view()

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def save_graph(self):
        self.parent.save_graph()

    def set_data(self, data):
        if data:
            self.data = data
            self.data_ids = {e: i for i, e in enumerate(data.ids)}
            self.restore_selection_settings()
        else:
            self.data = None
            self.data_ids = {}

    def refresh_markings(self, di):
        refresh_integral_markings([{
            "draw": di
        }], self.markings_integral, self.parent.curveplot)

    def update_view(self):
        self.img.clear()
        self.img.setSelection(None)
        self.lsx = None
        self.lsy = None
        self.data_points = None
        self.data_values = None
        self.data_imagepixels = None
        if self.data and self.attr_x and self.attr_y:
            xat = self.data.domain[self.attr_x]
            yat = self.data.domain[self.attr_y]

            ndom = Orange.data.Domain([xat, yat])
            datam = Orange.data.Table(ndom, self.data)
            coorx = datam.X[:, 0]
            coory = datam.X[:, 1]
            self.data_points = datam.X
            self.lsx = lsx = values_to_linspace(coorx)
            self.lsy = lsy = values_to_linspace(coory)
            if lsx[-1] * lsy[-1] > IMAGE_TOO_BIG:
                self.parent.Error.image_too_big(lsx[-1], lsy[-1])
                return
            else:
                self.parent.Error.image_too_big.clear()

            di = {}
            if self.parent.value_type == 0:  # integrals
                imethod = self.parent.integration_methods[
                    self.parent.integration_method]

                l1, l2, l3 = self.parent.lowlim, self.parent.highlim, self.parent.choose

                gx = getx(self.data)

                if l1 is None:
                    l1 = min(gx) - 1
                if l2 is None:
                    l2 = max(gx) + 1

                l1, l2 = min(l1, l2), max(l1, l2)

                if l3 is None:
                    l3 = (l1 + l2) / 2

                if imethod != Integrate.PeakAt:
                    datai = Integrate(methods=imethod,
                                      limits=[[l1, l2]])(self.data)
                else:
                    datai = Integrate(methods=imethod,
                                      limits=[[l3, l3]])(self.data)

                if np.any(self.parent.curveplot.selection_group):
                    # curveplot can have a subset of curves on the input> match IDs
                    ind = np.flatnonzero(
                        self.parent.curveplot.selection_group)[0]
                    dind = self.data_ids[self.parent.curveplot.data[ind].id]
                    di = datai.domain.attributes[0].compute_value.draw_info(
                        self.data[dind:dind + 1])
                d = datai.X[:, 0]
            else:
                dat = self.data.domain[self.parent.attr_value]
                ndom = Orange.data.Domain([dat])
                d = Orange.data.Table(ndom, self.data).X[:, 0]
            self.refresh_markings(di)

            # set data
            imdata = np.ones((lsy[2], lsx[2])) * float("nan")

            xindex = index_values(coorx, lsx)
            yindex = index_values(coory, lsy)
            imdata[yindex, xindex] = d
            self.data_values = d
            self.data_imagepixels = np.vstack((yindex, xindex)).T

            levels = get_levels(imdata)
            self.update_color_schema()

            self.img.setImage(imdata, levels=levels)

            # shift centres of the pixels so that the axes are useful
            shiftx = _shift(lsx)
            shifty = _shift(lsy)
            left = lsx[0] - shiftx
            bottom = lsy[0] - shifty
            width = (lsx[1] - lsx[0]) + 2 * shiftx
            height = (lsy[1] - lsy[0]) + 2 * shifty
            self.img.setRect(QRectF(left, bottom, width, height))

            self.selection_changed.emit()
            self.refresh_img_selection()

    def refresh_img_selection(self):
        selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8)
        selected_px[self.data_imagepixels[:, 0],
                    self.data_imagepixels[:, 1]] = self.selection_group
        self.img.setSelection(selected_px)

    def make_selection(self, selected, add):
        """Add selected indices to the selection."""
        add_to_group, add_group, remove = selection_modifiers()
        if self.data and self.lsx and self.lsy:
            if add_to_group:  # both keys - need to test it before add_group
                selnum = np.max(self.selection_group)
            elif add_group:
                selnum = np.max(self.selection_group) + 1
            elif remove:
                selnum = 0
            else:
                self.selection_group *= 0
                selnum = 1
            if selected is not None:
                self.selection_group[selected] = selnum
            self.refresh_img_selection()
        self.prepare_settings_for_saving()
        self.selection_changed.emit()

    def select_square(self, p1, p2, add):
        """ Select elements within a square drawn by the user.
        A selection needs to contain whole pixels """
        x1, y1 = p1.x(), p1.y()
        x2, y2 = p2.x(), p2.y()
        polygon = [
            QPointF(x1, y1),
            QPointF(x2, y1),
            QPointF(x2, y2),
            QPointF(x1, y2),
            QPointF(x1, y1)
        ]
        self.select_polygon(polygon, add)

    def select_polygon(self, polygon, add):
        """ Select by a polygon which has to contain whole pixels. """
        if self.data and self.lsx and self.lsy:
            polygon = [(p.x(), p.y()) for p in polygon]
            # a polygon should contain all pixel
            shiftx = _shift(self.lsx)
            shifty = _shift(self.lsy)
            points_edges = [
                self.data_points + [[shiftx, shifty]],
                self.data_points + [[-shiftx, shifty]],
                self.data_points + [[shiftx, -shifty]],
                self.data_points + [[-shiftx, -shifty]]
            ]
            inp = in_polygon(points_edges[0], polygon)
            for p in points_edges[1:]:
                inp *= in_polygon(p, polygon)
            self.make_selection(inp, add)

    def _points_at_pos(self, pos):
        if self.data and self.lsx and self.lsy:
            x, y = pos.x(), pos.y()
            distance = np.abs(self.data_points - [[x, y]])
            sel = (distance[:, 0] < _shift(self.lsx)) * (distance[:, 1] <
                                                         _shift(self.lsy))
            return sel

    def select_by_click(self, pos, add):
        sel = self._points_at_pos(pos)
        self.make_selection(sel, add)