Ejemplo n.º 1
0
class RadioBooleanFilter(QWidget, Control):

    """ Boolean filter (Only/Exclude)
    """

    def __init__(self, tree, dataset, master, parent=None):
        QWidget.__init__(self, parent)
        Control.__init__(self, tree, dataset, master)

        self.setLayout(QVBoxLayout())
        self.buttonGroup = QButtonGroup(self)
        self.values = []
        for i, option in enumerate(tree.subelements_top("Option")):
            rb = QRadioButton(option.displayName, self)
            self.buttonGroup.addButton(rb)
            self.buttonGroup.setId(rb, i)
            self.layout().addWidget(rb)
            self.values.append(option.value)
        self.buttonGroup.button(0).setChecked(True)

    def value(self):
        return {"excluded": "%i" % self.buttonGroup.checkedId()}

    def get_filter(self):
        return self.tree.internalName, self.value()

    def query(self):
        return [("Filter", self.tree, self.value())]

    def setControlValue(self, name, value):
        for i, v in enumerate(self.values):
            if v == value:
                button = self.buttonGroup.button(i)
                button.setChecked(True)
                break
    def __init__(self):
        super().__init__()
        self.corpus = None
        self.learning_thread = None

        # Commit button
        gui.auto_commit(self.buttonsArea, self, 'autocommit', 'Commit', box=False)

        button_group = QButtonGroup(self, exclusive=True)
        button_group.buttonClicked[int].connect(self.change_method)

        self.widgets = []
        method_layout = QVBoxLayout()
        self.controlArea.layout().addLayout(method_layout)
        for i, (method, attr_name) in enumerate(self.methods):
            widget = method(self, title='Options')
            widget.setFixedWidth(self.control_area_width)
            widget.valueChanged.connect(self.commit)
            self.widgets.append(widget)
            setattr(self, attr_name, widget)

            rb = QRadioButton(text=widget.Model.name)
            button_group.addButton(rb, i)
            method_layout.addWidget(rb)
            method_layout.addWidget(widget)

        button_group.button(self.method_index).setChecked(True)
        self.toggle_widgets()
        method_layout.addStretch()

        # Topics description
        self.topic_desc = TopicViewer()
        self.topic_desc.topicSelected.connect(self.send_topic_by_id)
        self.mainArea.layout().addWidget(self.topic_desc)
        self.topic_desc.setFocus()
Ejemplo n.º 3
0
    def __init__(self):
        super().__init__()
        self.corpus = None
        self.learning_thread = None

        # Commit button
        gui.auto_commit(self.buttonsArea, self, 'autocommit', 'Commit', box=False)

        button_group = QButtonGroup(self, exclusive=True)
        button_group.buttonClicked[int].connect(self.change_method)

        self.widgets = []
        method_layout = QVBoxLayout()
        self.controlArea.layout().addLayout(method_layout)
        for i, (method, attr_name) in enumerate(self.methods):
            widget = method(self, title='Options')
            widget.setFixedWidth(self.control_area_width)
            widget.valueChanged.connect(self.commit)
            self.widgets.append(widget)
            setattr(self, attr_name, widget)

            rb = QRadioButton(text=widget.Model.name)
            button_group.addButton(rb, i)
            method_layout.addWidget(rb)
            method_layout.addWidget(widget)

        button_group.button(self.method_index).setChecked(True)
        self.toggle_widgets()
        method_layout.addStretch()

        # Topics description
        self.topic_desc = TopicViewer()
        self.topic_desc.topicSelected.connect(self.send_topic_by_id)
        self.mainArea.layout().addWidget(self.topic_desc)
        self.topic_desc.setFocus()
Ejemplo n.º 4
0
class RadioBooleanFilter(QWidget, Control):

    """ Boolean filter (Only/Exclude)
    """

    def __init__(self, tree, dataset, master, parent=None):
        QWidget.__init__(self, parent)
        Control.__init__(self, tree, dataset, master)

        self.setLayout(QVBoxLayout())
        self.buttonGroup = QButtonGroup(self)
        self.values = []
        for i, option in enumerate(tree.subelements_top("Option")):
            rb = QRadioButton(option.displayName, self)
            self.buttonGroup.addButton(rb)
            self.buttonGroup.setId(rb, i)
            self.layout().addWidget(rb)
            self.values.append(option.value)
        self.buttonGroup.button(0).setChecked(True)

    def value(self):
        return {"excluded": "%i" % self.buttonGroup.checkedId()}

    def get_filter(self):
        return self.tree.internalName, self.value()

    def query(self):
        return [("Filter", self.tree, self.value())]

    def setControlValue(self, name, value):
        for i, v in enumerate(self.values):
            if v == value:
                button = self.buttonGroup.button(i)
                button.setChecked(True)
                break
Ejemplo n.º 5
0
    def __init__(self):
        super().__init__()
        ConcurrentWidgetMixin.__init__(self)

        self.corpus = None
        self.learning_thread = None
        self.__pending_selection = self.selection
        self.perplexity = "n/a"
        self.coherence = "n/a"

        # Commit button
        gui.auto_commit(self.buttonsArea,
                        self,
                        'autocommit',
                        'Commit',
                        box=False)

        button_group = QButtonGroup(self, exclusive=True)
        button_group.buttonClicked[int].connect(self.change_method)

        self.widgets = []
        method_layout = QVBoxLayout()
        self.controlArea.layout().addLayout(method_layout)
        for i, (method, attr_name) in enumerate(self.methods):
            widget = method(self, title='Options')
            widget.setFixedWidth(self.control_area_width)
            widget.valueChanged.connect(self.commit.deferred)
            self.widgets.append(widget)
            setattr(self, attr_name, widget)

            rb = QRadioButton(text=widget.Model.name)
            button_group.addButton(rb, i)
            method_layout.addWidget(rb)
            method_layout.addWidget(widget)

        button_group.button(self.method_index).setChecked(True)
        self.toggle_widgets()
        method_layout.addStretch()

        box = gui.vBox(self.controlArea, "Topic evaluation")
        gui.label(box, self, "Log perplexity: %(perplexity)s")
        gui.label(box, self, "Topic coherence: %(coherence)s")
        self.controlArea.layout().insertWidget(1, box)

        # Topics description
        self.topic_desc = TopicViewer()
        self.topic_desc.topicSelected.connect(self.send_topic_by_id)
        self.mainArea.layout().addWidget(self.topic_desc)
        self.topic_desc.setFocus()
Ejemplo n.º 6
0
def qbuttongroup_emit_clicked(bg: QButtonGroup, id_: int):
    button = bg.button(id_)
    bg.buttonClicked.emit(button)
    if QT_VERSION_INFO >= (5, 15):
        bg.idClicked.emit(id_)
    if QT_VERSION_INFO < (6, 0):
        bg.buttonClicked[int].emit(id_)
Ejemplo n.º 7
0
class SingleMethodModule(PreprocessorModule):
    Methods = NotImplemented
    DEFAULT_METHOD = NotImplemented

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.__method = self.DEFAULT_METHOD

        self.setLayout(QGridLayout())
        self.__group = QButtonGroup(self, exclusive=True)
        self.__group.buttonClicked.connect(self.__method_rb_clicked)
        for method_id in range(len(self.Methods)):
            method = self.Methods[method_id]
            rb = QRadioButton(method.name)
            rb.setChecked(self.__method == method_id)
            rb.setToolTip(self.get_tooltip(method))
            self.__group.addButton(rb, method_id)
            self.layout().addWidget(rb)

    @property
    def method(self) -> int:
        return self.__method

    def setParameters(self, params: Dict):
        self._set_method(params.get("method", self.DEFAULT_METHOD))

    def _set_method(self, method: int):
        if self.__method != method:
            self.__method = method
            self.__group.button(method).setChecked(True)
            self.changed.emit()

    def __method_rb_clicked(self):
        self._set_method(self.__group.checkedId())
        self.edited.emit()

    def parameters(self) -> Dict:
        return {"method": self.__method}

    def __repr__(self):
        return self.Methods[self.__method].name
Ejemplo n.º 8
0
class OWDiscretize(widget.OWWidget):
    # pylint: disable=too-many-instance-attributes
    name = "Discretize"
    description = "Discretize numeric variables"
    category = "Transform"
    icon = "icons/Discretize.svg"
    keywords = ["bin", "categorical", "nominal", "ordinal"]
    priority = 2130

    class Inputs:
        data = Input("Data", Table, doc="Input data table")

    class Outputs:
        data = Output("Data", Table, doc="Table with categorical features")

    settings_version = 3

    #: Default setting (key DefaultKey) and specific settings for variables;
    # if variable is not in the dict, it uses default
    var_hints: Dict[KeyType, VarHint] = Setting(
        {DefaultKey: DefaultHint}, schema_only=True)
    autosend = Setting(True)

    want_main_area = False

    def __init__(self):
        super().__init__()

        #: input data
        self.data = None
        #: Cached discretized variables
        self.discretized_vars: Dict[KeyType, DiscreteVariable] = {}

        # Indicates that buttons, spins, edit and combos are being changed
        # programmatically (when interface is changed due to selection change),
        # so this should not trigger update of hints and invalidation of
        # discretization in `self.discretized_vars`.
        self.__interface_update = False

        box = gui.hBox(self.controlArea, True, spacing=8)
        self._create_var_list(box)
        self._create_buttons(box)
        gui.auto_apply(self.buttonsArea, self, "autosend")
        gui.rubber(self.buttonsArea)
        self.varview.select_default()

    def _create_var_list(self, box):
        """Create list view with variables"""
        # If we decide to not elide, remove the `uniformItemSize` argument
        self.varview = ListViewSearch(
            selectionMode=QListView.ExtendedSelection, uniformItemSizes=True)
        self.varview.setModel(
            DiscDomainModel(
                valid_types=(ContinuousVariable, TimeVariable),
                order=DiscDomainModel.MIXED
            ))
        self.varview.selectionModel().selectionChanged.connect(
            self._var_selection_changed)
        self.varview.default_view.selectionModel().selectionChanged.connect(
            self._default_selected)
        self._update_default_model()
        box.layout().addWidget(self.varview)

    def _create_buttons(self, box):
        """Create radio buttons"""
        def intspin():
            s = QSpinBox(self)
            s.setMinimum(2)
            s.setMaximum(10)
            s.setFixedWidth(60)
            s.setAlignment(Qt.AlignRight)
            s.setContentsMargins(0, 0, 0, 0)
            return s, s.valueChanged

        def widthline(validator):
            s = QLineEdit(self)
            s.setFixedWidth(60)
            s.setAlignment(Qt.AlignRight)
            s.setValidator(validator)
            s.setContentsMargins(0, 0, 0, 0)
            return s, s.textChanged

        def manual_cut_editline(text="", enabled=True) -> QLineEdit:
            edit = QLineEdit(
                text=text,
                placeholderText="e.g. 0.0, 0.5, 1.0",
                toolTip='<p style="white-space:pre">' +
                        'Enter cut points as a comma-separate list of \n'
                        'strictly increasing numbers e.g. 0.0, 0.5, 1.0).</p>',
                enabled=enabled,
            )
            edit.setValidator(IncreasingNumbersListValidator())
            edit.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

            @edit.textChanged.connect
            def update():
                validator = edit.validator()
                if validator is not None and edit.text().strip():
                    state, _, _ = validator.validate(edit.text(), 0)
                else:
                    state = QValidator.Acceptable
                palette = edit.palette()
                colors = {
                    QValidator.Intermediate: (Qt.yellow, Qt.black),
                    QValidator.Invalid: (Qt.red, Qt.black),
                }.get(state, None)
                if colors is None:
                    palette = QPalette()
                else:
                    palette.setColor(QPalette.Base, colors[0])
                    palette.setColor(QPalette.Text, colors[1])

                cr = edit.cursorRect()
                p = edit.mapToGlobal(cr.bottomRight())
                edit.setPalette(palette)
                if state != QValidator.Acceptable and edit.isVisible():
                    validator.show_tip(edit, p, edit.toolTip(),
                                       textFormat=Qt.RichText)
                else:
                    validator.show_tip(edit, p, "")
            return edit, edit.textChanged

        children = []

        def button(id_, *controls, stretch=True):
            layout = QHBoxLayout()
            desc = Options[id_]
            button = QRadioButton(desc.label)
            button.setToolTip(desc.tooltip)
            self.button_group.addButton(button, id_)
            layout.addWidget(button)
            if controls:
                if stretch:
                    layout.addStretch(1)
                for c, signal in controls:
                    layout.addWidget(c)
                    if signal is not None:
                        @signal.connect
                        def arg_changed():
                            self.button_group.button(id_).setChecked(True)
                            self.update_hints(id_)

            children.append(layout)
            button_box.layout().addLayout(layout)
            return (*controls, (None, ))[0][0]

        button_box = gui.vBox(box)
        button_box.layout().setSpacing(0)
        button_box.setSizePolicy(QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Preferred))
        self.button_group = QButtonGroup(self)
        self.button_group.idClicked.connect(self.update_hints)

        button(Methods.Keep)
        button(Methods.Remove)

        self.binning_spin = button(Methods.Binning, intspin())
        validator = QDoubleValidator()
        validator.setBottom(0)
        self.width_line = button(Methods.FixedWidth, widthline(validator))

        self.width_time_unit = u = QComboBox(self)
        u.setContentsMargins(0, 0, 0, 0)
        u.addItems([unit + "(s)" for unit in time_units])
        validator = QIntValidator()
        validator.setBottom(1)
        self.width_time_line = button(Methods.FixedWidthTime,
                                      widthline(validator),
                                      (u, u.currentTextChanged))

        self.freq_spin = button(Methods.EqualFreq, intspin())
        self.width_spin = button(Methods.EqualWidth, intspin())
        button(Methods.MDL)

        self.copy_to_custom = FixedSizeButton(
            text="CC", toolTip="Copy the current cut points to manual mode")
        self.copy_to_custom.clicked.connect(self._copy_to_manual)
        self.threshold_line = button(Methods.Custom,
                                     manual_cut_editline(),
                                     (self.copy_to_custom, None),
                                     stretch=False)
        button(Methods.Default)
        maxheight = max(w.sizeHint().height() for w in children)
        for w in children:
            w.itemAt(0).widget().setFixedHeight(maxheight)
        button_box.layout().addStretch(1)

    def _update_default_model(self):
        """Update data in the model showing default settings"""
        model = self.varview.default_view.model()
        model.setData(model.index(0), self.var_hints[DefaultKey], Qt.UserRole)

    def _set_mdl_button(self):
        """Disable MDL discretization for data with non-discrete class"""
        mdl_button = self.button_group.button(Methods.MDL)
        if self.data is None or self.data.domain.has_discrete_class:
            mdl_button.setEnabled(True)
        else:
            if mdl_button.isChecked():
                self._check_button(Methods.Keep, True)
            mdl_button.setEnabled(False)

    def _check_button(self, method_id: Methods, checked: bool):
        """Checks the given button"""
        self.button_group.button(method_id).setChecked(checked)

    def _uncheck_all_buttons(self):
        """Uncheck all radio buttons"""
        group = self.button_group
        button = group.checkedButton()
        if button is not None:
            group.setExclusive(False)
            button.setChecked(False)
            group.setExclusive(True)

    def _set_radio_enabled(self, method_id: Methods, value: bool):
        """Enable/disable radio button and related controls"""
        if self.button_group.button(method_id).isChecked() and not value:
            self._uncheck_all_buttons()
        self.button_group.button(method_id).setEnabled(value)
        for control_name in Options[method_id].controls:
            getattr(self, control_name).setEnabled(value)

    def _get_values(self, method_id: Methods) -> Tuple[Union[int, float, str]]:
        """Return parameters from controls pertaining to the given method"""
        controls = Options[method_id].controls
        values = []
        for control_name in controls:
            control = getattr(self, control_name)
            if isinstance(control, QSpinBox):
                values.append(control.value())
            elif isinstance(control, QComboBox):
                values.append(control.currentIndex())
            else:
                values.append(control.text())
        return tuple(values)

    def _set_values(self, method_id: Methods,
                    values: Tuple[Union[str, int, float]]):
        """
        Set controls pertaining to the given method to parameters from hint
        """
        controls = Options[method_id].controls
        for control_name, value in zip(controls, values):
            control = getattr(self, control_name)
            if isinstance(control, QSpinBox):
                control.setValue(value)
            elif isinstance(control, QComboBox):
                control.setCurrentIndex(value)
            else:
                control.setText(value)

    def varkeys_for_selection(self) -> List[KeyType]:
        """
        Return list of KeyType's for selected variables (for indexing var_hints)

        If 'Default settings' are selected, this returns DefaultKey
        """
        model = self.varview.model()
        varkeys = [variable_key(model[index.row()])
                   for index in self.varview.selectionModel().selectedRows()]
        return varkeys or [DefaultKey]  # default settings are selected

    def update_hints(self, method_id: Methods):
        """
        Callback for radio buttons and for controls regulating parameters

        This function:
        - updates `var_hints` for all selected methods
        - invalidates (removes) `discretized_vars` for affected variables
        - calls _update_discretizations to compute and commit new discretization
        - calls deferred commit

        Data for list view models is updated in _update_discretizations
        """
        if self.__interface_update:
            return

        method_id = Methods(method_id)
        args = self._get_values(method_id)
        keys = self.varkeys_for_selection()
        if method_id == Methods.Default:
            for key in keys:
                if key in self.var_hints:
                    del self.var_hints[key]
        else:
            self.var_hints.update(dict.fromkeys(keys, VarHint(method_id, args)))
        if keys == [DefaultKey]:
            invalidate = set(self.discretized_vars) - set(self.var_hints)
        else:
            invalidate = keys
        for key in invalidate:
            del self.discretized_vars[key]

        if keys == [DefaultKey]:
            self._update_default_model()
        self._update_discretizations()
        self.commit.deferred()

    def _update_discretizations(self):
        """
        Compute invalidated (missing) discretizations

        Also set data for list view models for all invalidated variables
        """
        if self.data is None:
            return

        default_hint = self.var_hints[DefaultKey]
        model = self.varview.model()
        for index, var in enumerate(model):
            key = variable_key(var)
            if key in self.discretized_vars:
                continue  # still valid
            var_hint = self.var_hints.get(key)
            points, dvar = self._discretize_var(var, var_hint or default_hint)
            self.discretized_vars[key] = dvar
            values = getattr(dvar, "values", ())
            model.setData(model.index(index),
                          DiscDesc(var_hint, points, values),
                          Qt.UserRole)

    def _discretize_var(self, var: ContinuousVariable, hint: VarHint) \
        -> Tuple[str, Optional[Variable]]:
        """
        Discretize using method and data in the hint.

        Returns a description (list of points or error/warning) and a
        - discrete variable
        - same variable (if kept numeric)
        - None (if removed or errored)
        """
        if isinstance(var, TimeVariable):
            if hint.method_id in (Methods.FixedWidth, Methods.Custom):
                return ": <keep, time var>", var
        else:
            if hint.method_id == Methods.FixedWidthTime:
                return ": <keep, not time>", var

        function = Options[hint.method_id].function
        dvar = function(self.data, var, *hint.args)
        if isinstance(dvar, str):
            return f" <{dvar}>", None  # error
        if dvar is None:
            return "", None  # removed
        elif dvar is var:
            return "", var  # no transformation
        thresholds = dvar.compute_value.points
        if len(thresholds) == 0:
            return " <removed>", None
        return ": " + ", ".join(map(var.repr_val, thresholds)), dvar

    def _copy_to_manual(self):
        """
        Callback for 'CC' button

        Sets selected variables' method to "Custom" and copies thresholds
        to their VarHints. Variables that are not discretized (for any reason)
        are skipped.

        Discretizations are invalidated and then updated
        (`_update_discretizations`).

        If all selected variables have the same thresholds, it copies it to
        the line edit. Otherwise it unchecks all radio buttons to keep the
        interface consistent.
        """
        varkeys = self.varkeys_for_selection()
        texts = set()
        for key in varkeys:
            dvar = self.discretized_vars.get(key)
            fmt = self.data.domain[key[0]].repr_val
            if isinstance(dvar, DiscreteVariable):
                text = ", ".join(map(fmt, dvar.compute_value.points))
                texts.add(text)
                self.var_hints[key] = VarHint(Methods.Custom, (text, ))
                del self.discretized_vars[key]
        try:
            self.__interface_update = True
            if len(texts) == 1:
                self.threshold_line.setText(texts.pop())
            else:
                self._uncheck_all_buttons()
        finally:
            self.__interface_update = False
        self._update_discretizations()
        self.commit.deferred()

    def _default_selected(self, selected):
        """Callback for selecting 'Default setting'"""
        if not selected:
            # Prevent infinite recursion (with _var_selection_changed)
            return
        self.varview.selectionModel().clearSelection()
        self._update_interface()

        set_enabled = self._set_radio_enabled
        set_enabled(Methods.Default, False)
        set_enabled(Methods.FixedWidth, True)
        set_enabled(Methods.FixedWidthTime, True)
        set_enabled(Methods.Custom, True)
        self.copy_to_custom.setEnabled(False)

    def _var_selection_changed(self, _):
        """Callback for changed selection in listview with variables"""
        selected = self.varview.selectionModel().selectedIndexes()
        if not selected:
            # Prevent infinite recursion (with _default_selected)
            return
        self.varview.default_view.selectionModel().clearSelection()
        self._update_interface()

        set_enabled = self._set_radio_enabled
        vars_ = [self.data.domain[name]
                 for name, _ in self.varkeys_for_selection()]
        no_time = not any(isinstance(var, TimeVariable) for var in vars_)
        all_time = all(isinstance(var, TimeVariable) for var in vars_)
        set_enabled(Methods.Default, True)
        set_enabled(Methods.FixedWidth, no_time)
        set_enabled(Methods.Custom, no_time)
        self.copy_to_custom.setEnabled(no_time)
        set_enabled(Methods.FixedWidthTime, all_time)

    def _update_interface(self):
        """
        Update the user interface according to selection

        - If VarHints for all selected variables are the same, check the
          corresponding radio button and fill the corresponding controls;
        - otherwise, uncheck all radios.
        """
        if self.__interface_update:
            return

        try:
            self.__interface_update = True
            keys = self.varkeys_for_selection()
            mset = list(unique_everseen(map(self.var_hints.get, keys)))
            if len(mset) != 1:
                self._uncheck_all_buttons()
                return

            if mset == [None]:
                method_id, args = Methods.Default, ()
            else:
                method_id, args = mset.pop()
            self._check_button(method_id, True)
            self._set_values(method_id, args)
        finally:
            self.__interface_update = False

    @Inputs.data
    def set_data(self, data: Optional[Table]):
        self.discretized_vars = {}
        self.data = data
        self.varview.model().set_domain(None if data is None else data.domain)
        self._update_discretizations()
        self._update_default_model()
        self.varview.select_default()
        self._set_mdl_button()
        self.commit.now()

    @gui.deferred
    def commit(self):
        if self.data is None:
            self.Outputs.data.send(None)
            return

        def part(variables: List[Variable]) -> List[Variable]:
            return [dvar
                    for dvar in (self.discretized_vars.get(variable_key(v), v)
                                 for v in variables)
                    if dvar]

        d = self.data.domain
        domain = Domain(part(d.attributes), part(d.class_vars), part(d.metas))
        output = self.data.transform(domain)
        self.Outputs.data.send(output)

    def send_report(self):
        dmodel = self.varview.default_view.model()
        desc = dmodel.data(dmodel.index(0))
        self.report_items((tuple(desc.split(": ", maxsplit=1)), ))
        model = self.varview.model()
        reported = []
        for row in range(model.rowCount()):
            name = model[row].name
            desc = model.data(model.index(row), Qt.UserRole)
            if desc.hint is not None:
                name = f"{name} ({format_desc(desc.hint)})"
            reported.append((name, ', '.join(desc.values)))
        self.report_items("Variables", reported)

    @classmethod
    def migrate_settings(cls, settings, version):
        if version is None or version < 2:
            # was stored as int indexing Methods (but offset by 1)
            default = settings.pop("default_method", 0)
            default = Methods(default + 1)
            settings["default_method_name"] = default.name

        if version is None or version < 3:
            method_name = settings.pop("default_method_name",
                                       DefaultHint.method_id.name)
            k = settings.pop("default_k", 3)
            cut_points = settings.pop("default_cutpoints", ())

            method_id = getattr(Methods, method_name)
            if method_id in (Methods.EqualFreq, Methods.EqualWidth):
                args = (k, )
            elif method_id == Methods.Custom:
                args = (cut_points, )
            else:
                args = ()
            default_hint = VarHint(method_id, args)
            var_hints = {DefaultKey: default_hint}
            for context in settings.pop("context_settings", []):
                values = context.values
                if "saved_var_states" not in values:
                    continue
                var_states, _ = values.pop("saved_var_states")
                for (tpe, name), dstate in var_states.items():
                    key = (name, tpe == 4)  # time variable == 4
                    method = dstate.method
                    method_name = type(method).__name__.replace("Leave", "Keep")
                    if method_name == "Default":
                        continue
                    if method_name == "Custom":
                        args = (", ".join(f"{x:g}" for x in method.points), )
                    else:
                        args = tuple(method)
                    var_hints[key] = VarHint(getattr(Methods, method_name), args)
            settings["var_hints"] = var_hints
Ejemplo n.º 9
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102

    buttons_area_orientation = Qt.Vertical

    inputs = [("Data", Table, "setData"),
              ("Scorer", score.Scorer, "set_learner", widget.Multiple)]
    outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)]

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"})
    reg_default_selected = Setting({"Univariate Linear Regression", "RReliefF"})
    selectMethod = Setting(SelectNBest)
    nSelected = Setting(5)
    auto_apply = Setting(True)

    # Header state for discrete/continuous/no_class scores
    headerState = Setting([None, None, None])

    settings_version = 1
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])

    gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True
    _score_vars = ["gain", "inf_gain", "gini", "anova", "chi2", "relief",
                   "fcbc", "ulr", "rrelief"]

    class Warning(OWWidget.Warning):
        no_target_var = Msg("Data does not have a target variable")

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("{}")

    def __init__(self):
        super().__init__()
        self.measure_scores = None
        self.update_scores = True
        self.usefulAttributes = []
        self.learners = {}
        self.labels = []
        self.out_domain_desc = None

        self.all_measures = SCORES

        self.selectedMeasures = dict([(m.name, True) for m
                                      in self.all_measures])
        # Discrete (0) or continuous (1) class mode
        self.rankMode = 0

        self.data = None

        self.discMeasures = [m for m in self.all_measures if
                             issubclass(DiscreteVariable, m.score.class_type)]
        self.contMeasures = [m for m in self.all_measures if
                             issubclass(ContinuousVariable, m.score.class_type)]

        self.score_checks = []
        self.cls_scoring_box = gui.vBox(None, "Scoring for Classification")
        self.reg_scoring_box = gui.vBox(None, "Scoring for Regression")
        boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2
        for _score, var, box in zip(SCORES, self._score_vars, boxes):
            check = gui.checkBox(
                box, self, var, label=_score.name,
                callback=lambda val=_score: self.measuresSelectionChanged(val))
            self.score_checks.append(check)

        self.score_stack = QStackedWidget(self)
        self.score_stack.addWidget(self.cls_scoring_box)
        self.score_stack.addWidget(self.reg_scoring_box)
        self.score_stack.addWidget(QWidget())
        self.controlArea.layout().addWidget(self.score_stack)

        gui.rubber(self.controlArea)

        selMethBox = gui.vBox(
                self.controlArea, "Select Attributes", addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox, self, "nSelected", 1, 100,
                     callback=self.nSelectedChanged)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        # Discrete, continuous and no_class table views are stacked
        self.ranksViewStack = QStackedLayout()
        self.mainArea.layout().addLayout(self.ranksViewStack)

        self.discRanksView = QTableView()
        self.ranksViewStack.addWidget(self.discRanksView)
        self.discRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.discRanksView.setSelectionMode(QTableView.MultiSelection)
        self.discRanksView.setSortingEnabled(True)

        self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures]
        self.discRanksModel = QStandardItemModel(self)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)

        self.discRanksProxyModel = MySortProxyModel(self)
        self.discRanksProxyModel.setSourceModel(self.discRanksModel)
        self.discRanksView.setModel(self.discRanksProxyModel)

        self.discRanksView.setColumnWidth(0, 20)
        self.discRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.discRanksView.pressed.connect(self.onSelectItem)
        self.discRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.discRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[0] is not None:
            self.discRanksView.horizontalHeader().restoreState(
                self.headerState[0])

        self.contRanksView = QTableView()
        self.ranksViewStack.addWidget(self.contRanksView)
        self.contRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.contRanksView.setSelectionMode(QTableView.MultiSelection)
        self.contRanksView.setSortingEnabled(True)

        self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures]
        self.contRanksModel = QStandardItemModel(self)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)

        self.contRanksProxyModel = MySortProxyModel(self)
        self.contRanksProxyModel.setSourceModel(self.contRanksModel)
        self.contRanksView.setModel(self.contRanksProxyModel)

        self.contRanksView.setColumnWidth(0, 20)
        self.contRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.contRanksView.pressed.connect(self.onSelectItem)
        self.contRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.contRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[1] is not None:
            self.contRanksView.horizontalHeader().restoreState(
                self.headerState[1])

        self.noClassRanksView = QTableView()
        self.ranksViewStack.addWidget(self.noClassRanksView)
        self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.noClassRanksView.setSelectionMode(QTableView.MultiSelection)
        self.noClassRanksView.setSortingEnabled(True)

        self.noClassRanksLabels = ["#"]
        self.noClassRanksModel = QStandardItemModel(self)
        self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels)

        self.noClassRanksProxyModel = MySortProxyModel(self)
        self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel)
        self.noClassRanksView.setModel(self.noClassRanksProxyModel)

        self.noClassRanksView.setColumnWidth(0, 20)
        self.noClassRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.noClassRanksView.pressed.connect(self.onSelectItem)
        self.noClassRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.noClassRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[2] is not None:
            self.noClassRanksView.horizontalHeader().restoreState(
                self.headerState[2])

        # Switch the current view to Discrete
        self.switchRanksMode(0)
        self.resetInternals()
        self.updateDelegates()
        self.updateVisibleScoreColumns()

        self.resize(690, 500)

        self.measure_scores = table((len(self.measures), 0), None)

    def switchRanksMode(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.rankMode = index
        self.ranksViewStack.setCurrentIndex(index)

        if index == 0:
            self.ranksView = self.discRanksView
            self.ranksModel = self.discRanksModel
            self.ranksProxyModel = self.discRanksProxyModel
            self.measures = self.discMeasures
            self.selected_checks = self.cls_default_selected
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        elif index == 1:
            self.ranksView = self.contRanksView
            self.ranksModel = self.contRanksModel
            self.ranksProxyModel = self.contRanksProxyModel
            self.measures = self.contMeasures
            self.selected_checks = self.reg_default_selected
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        else:
            self.ranksView = self.noClassRanksView
            self.ranksModel = self.noClassRanksModel
            self.ranksProxyModel = self.noClassRanksProxyModel
            self.measures = []
            self.selected_checks = set()
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)

        shape = (len(self.measures) + len(self.learners), 0)
        self.measure_scores = table(shape, None)
        self.update_scores = False
        for check, score in zip(self.score_checks, SCORES):
            check.setChecked(score.name in self.selected_checks)
        self.update_scores = True
        self.score_stack.setCurrentIndex(index)
        self.updateVisibleScoreColumns()

    @check_sql_input
    def setData(self, data):
        self.closeContext()
        self.clear_messages()
        self.resetInternals()

        self.data = data
        self.switchRanksMode(0)
        if self.data is not None:
            domain = self.data.domain
            attrs = domain.attributes
            self.usefulAttributes = [attr for attr in attrs
                                     if attr.is_discrete or attr.is_continuous]

            if domain.has_continuous_class:
                self.switchRanksMode(1)
            elif not domain.class_var:
                self.Warning.no_target_var()
                self.switchRanksMode(2)
            elif not domain.has_discrete_class:
                self.Error.invalid_type(type(domain.class_var).__name__)

            if issparse(self.data.X):   # keep only measures supporting sparse data
                self.measures = [m for m in self.measures
                                 if m.score.supports_sparse_data]

            self.ranksModel.setRowCount(len(attrs))
            for i, a in enumerate(attrs):
                if a.is_discrete:
                    v = len(a.values)
                else:
                    v = "C"
                item = ScoreValueItem()
                item.setData(v, Qt.DisplayRole)
                self.ranksModel.setItem(i, 0, item)
                item = QStandardItem(a.name)
                item.setData(gui.attributeIconDict[a], Qt.DecorationRole)
                self.ranksModel.setVerticalHeaderItem(i, item)

            shape = (len(self.measures) + len(self.learners), len(attrs))
            self.measure_scores = table(shape, None)
            self.updateScores()
        else:
            self.send("Scores", None)

        self.selected_rows = []
        self.openContext(data)
        self.selectMethodChanged()
        self.commit()

    def get_selection(self):
        selection = self.ranksView.selectionModel().selection()
        return list(set(ind.row() for ind in selection.indexes()))

    def set_learner(self, learner, lid=None):
        if learner is None and lid is not None:
            del self.learners[lid]
        elif learner is not None:
            self.learners[lid] = score_meta(
                learner.name,
                learner.name,
                learner
            )
        attrs_len = 0 if not self.data else len(self.data.domain.attributes)
        shape = (len(self.learners), attrs_len)
        self.measure_scores = self.measure_scores[:len(self.measures)]
        self.measure_scores += table(shape, None)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)
        measures_mask = [False] * len(self.measures)
        measures_mask += [True for _ in self.learners]
        self.updateScores(measures_mask)
        self.commit()

    def updateScores(self, measuresMask=None):
        """
        Update the current computed scores.

        If `measuresMask` is given it must be an list of bool values
        indicating what measures should be recomputed.

        """
        if not self.data:
            return
        if self.data.has_missing():
            self.information("Missing values have been imputed.")

        measures = self.measures + [v for k, v in self.learners.items()]
        if measuresMask is None:
            # Update all selected measures
            measuresMask = [self.selectedMeasures.get(m.name)
                            for m in self.measures]
            measuresMask = measuresMask + [v.name for k, v in
                                           self.learners.items()]

        data = self.data
        learner_col = len(self.measures)
        if len(measuresMask) <= len(self.measures) or \
                measuresMask[len(self.measures)]:
            self.labels = []
            self.Error.inadequate_learner.clear()

        self.setStatusMessage("Running")
        with self.progressBar():
            n_measure_update = len([x for x in measuresMask if x is not False])
            count = 0
            for index, (meas, mask) in enumerate(zip(measures, measuresMask)):
                if not mask:
                    continue
                self.progressBarSet(90 * count / n_measure_update)
                count += 1
                if index < len(self.measures):
                    estimator = meas.score()
                    try:
                        self.measure_scores[index] = estimator(data)
                    except ValueError:
                        self.measure_scores[index] = []
                        for attr in data.domain.attributes:
                            try:
                                self.measure_scores[index].append(
                                    estimator(data, attr))
                            except ValueError:
                                self.measure_scores[index].append(None)
                else:
                    learner = meas.score
                    if isinstance(learner, Learner) and \
                            not learner.check_learner_adequacy(self.data.domain):
                        self.Error.inadequate_learner(
                            learner.learner_adequacy_err_msg)
                        scores = table((1, len(data.domain.attributes)))
                    else:
                        scores = meas.score.score_data(data)
                    for i, row in enumerate(scores):
                        self.labels.append(meas.shortname + str(i + 1))
                        if len(self.measure_scores) > learner_col:
                            self.measure_scores[learner_col] = row
                        else:
                            self.measure_scores.append(row)
                        learner_col += 1
            self.progressBarSet(90)
        self.contRanksModel.setHorizontalHeaderLabels(
            self.contRanksLabels + self.labels
        )
        self.discRanksModel.setHorizontalHeaderLabels(
            self.discRanksLabels + self.labels
        )
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels + self.labels
        )
        self.updateRankModel(measuresMask)
        self.ranksProxyModel.invalidate()
        self.selectMethodChanged()
        self.send("Scores", self.create_scores_table(self.labels))
        self.setStatusMessage("")

    def updateRankModel(self, measuresMask):
        """
        Update the rankModel.
        """
        values = []
        diff = len(self.measure_scores) - len(measuresMask)
        if len(measuresMask):
            measuresMask += [measuresMask[-1]] * diff
        for i in range(self.ranksModel.columnCount() - 1,
                       len(self.measure_scores), -1):
            self.ranksModel.removeColumn(i)

        for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)):
            if not m and self.ranksModel.item(0, i + 1):
                values.append([])
                continue
            values_one = []
            for j, _score in enumerate(scores):
                values_one.append(_score)
                item = self.ranksModel.item(j, i + 1)
                if not item:
                    item = ScoreValueItem()
                    self.ranksModel.setItem(j, i + 1, item)
                item.setData(_score, Qt.DisplayRole)
            values.append(values_one)
        for i, (vals, m) in enumerate(zip(values, measuresMask)):
            if not m:
                continue
            valid_vals = [v for v in vals if v is not None]
            if valid_vals:
                vmin, vmax = min(valid_vals), max(valid_vals)
                for j, v in enumerate(vals):
                    if v is not None:
                        # Set the bar ratio role for i-th measure.
                        ratio = float((v - vmin) / ((vmax - vmin) or 1))
                        item = self.ranksModel.item(j, i + 1)
                        item.setData(ratio, gui.BarRatioRole)

        self.ranksView.setColumnWidth(0, 20)
        self.ranksView.resizeRowsToContents()

    def resetInternals(self):
        self.data = None
        self.usefulAttributes = []
        self.ranksModel.setRowCount(0)

    def onSelectItem(self, index):
        """
        Called when the user selects/unselects an item in the table view.
        """
        self.selectMethod = OWRank.SelectManual  # Manual
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.commit()

    def setSelectMethod(self, method):
        if self.selectMethod != method:
            self.selectMethod = method
            self.selectButtons.button(method).setChecked(True)
            self.selectMethodChanged()

    def selectMethodChanged(self):
        self.autoSelection()
        self.ranksView.setFocus()

    def nSelectedChanged(self):
        self.selectMethod = OWRank.SelectNBest
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.selectMethodChanged()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        rowCount = self.ranksModel.rowCount()
        columnCount = self.ranksModel.columnCount()
        model = self.ranksProxyModel

        if self.selectMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0),
                model.index(rowCount - 1, columnCount - 1)
            )
        elif self.selectMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0),
                model.index(nSelected - 1, columnCount - 1)
            )
        else:
            selection = QItemSelection()
            if len(self.selected_rows):
                selection = QItemSelection()
                for row in self.selected_rows:
                    selection.append(QItemSelectionRange(
                        model.index(row, 0), model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        disc = bytes(self.discRanksView.horizontalHeader().saveState())
        cont = bytes(self.contRanksView.horizontalHeader().saveState())
        no_class = bytes(self.noClassRanksView.horizontalHeader().saveState())
        self.headerState = [disc, cont, no_class]

    def measuresSelectionChanged(self, measure):
        """Measure selection has changed. Update column visibility.
        """
        checked = self.selectedMeasures[measure.name]
        self.selectedMeasures[measure.name] = not checked
        if not checked:
            self.selected_checks.add(measure.name)
        elif measure.name in self.selected_checks:
            self.selected_checks.remove(measure.name)
        measures_mask = [False] * len(self.measures)
        measures_mask += [False for _ in self.learners]
        # Update scores for shown column if they are not yet computed.
        if measure in self.measures and self.measure_scores:
            index = self.measures.index(measure)
            if all(s is None for s in self.measure_scores[index]):
                measures_mask[index] = True
        if self.update_scores:
            self.updateScores(measures_mask)
        self.updateVisibleScoreColumns()

    def updateVisibleScoreColumns(self):
        """
        Update the visible columns of the scores view.
        """
        for i, measure in enumerate(self.measures):
            shown = self.selectedMeasures.get(measure.name)
            self.ranksView.setColumnHidden(i + 1, not shown)
            self.ranksView.setColumnWidth(i + 1, 100)

        index = self.ranksView.horizontalHeader().sortIndicatorSection()
        if self.ranksView.isColumnHidden(index):
            self.headerState[self.rankMode] = None

        if self.headerState[self.rankMode] is None:
            def get_sort_by_col(measures, selected_measures):
                cols = [i + 1 for i, m in enumerate(measures) if
                        m.name in selected_measures]
                return cols[0] if cols else len(measures) + 1

            col = get_sort_by_col(self.measures, self.selected_checks)
            self.ranksView.sortByColumn(col, Qt.DescendingOrder)
            self.autoSelection()

    def updateDelegates(self):
        self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        self.selected_rows = self.get_selection()
        if self.data and len(self.data.domain.attributes) == len(
                self.selected_rows):
            self.selectMethod = OWRank.SelectAll
            self.selectButtons.button(self.selectMethod).setChecked(True)
        selected = self.selectedAttrs()
        if not self.data or not selected:
            self.send("Reduced Data", None)
            self.out_domain_desc = None
        else:
            data = Table(Domain(selected, self.data.domain.class_var,
                                self.data.domain.metas), self.data)
            self.send("Reduced Data", data)
            self.out_domain_desc = report.describe_domain(data.domain)

    def selectedAttrs(self):
        if self.data:
            inds = self.ranksView.selectionModel().selectedRows(0)
            source = self.ranksProxyModel.mapToSource
            inds = map(source, inds)
            inds = [ind.row() for ind in inds]
            return [self.data.domain.attributes[i] for i in inds]
        else:
            return []

    def create_scores_table(self, labels):
        indices = [i for i, m in enumerate(self.measures)
                   if self.selectedMeasures.get(m.name, False)]
        measures = [s.name for s in self.measures if
                    self.selectedMeasures.get(s.name, False)]
        measures += [label for label in labels]
        if not measures:
            return None
        features = [ContinuousVariable(s) for s in measures]
        metas = [StringVariable("Feature name")]
        domain = Domain(features, metas=metas)

        scores = np.array([row for i, row in enumerate(self.measure_scores)
                           if i in indices or i >= len(self.measures)]).T
        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0
            # headerState had length 2
            headerState = settings.get("headerState", None)
            if headerState is not None and \
                    isinstance(headerState, tuple) and \
                    len(headerState) < 3:
                headerState = (list(headerState) + [None] * 3)[:3]
                settings["headerState"] = headerState
Ejemplo n.º 10
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102
    keywords = []

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table)
        scorer = Input("Scorer", score.Scorer, multiple=True)

    class Outputs:
        reduced_data = Output("Reduced Data", Table, default=True)
        scores = Output("Scores", Table)
        features = Output("Features", AttributeList, dynamic=False)

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    nSelected = ContextSetting(5)
    auto_apply = Setting(True)

    sorting = Setting((0, Qt.DescendingOrder))
    selected_methods = Setting(set())

    settings_version = 2
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])
    selectionMethod = ContextSetting(SelectNBest)

    class Information(OWWidget.Information):
        no_target_var = Msg("Data does not have a single target variable. "
                            "You can still connect in unsupervised scorers "
                            "such as PCA.")
        missings_imputed = Msg('Missing values will be imputed as needed.')

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("Scorer {} inadequate: {}")
        no_attributes = Msg("Data does not have a single attribute.")

    def __init__(self):
        super().__init__()
        self.scorers = OrderedDict()
        self.out_domain_desc = None
        self.data = None
        self.problem_type_mode = ProblemType.CLASSIFICATION

        if not self.selected_methods:
            self.selected_methods = {method.name for method in SCORES
                                     if method.is_default}

        # GUI

        self.ranksModel = model = TableModel(parent=self)  # type: TableModel
        self.ranksView = view = TableView(self)            # type: TableView
        self.mainArea.layout().addWidget(view)
        view.setModel(model)
        view.setColumnWidth(0, 30)
        view.selectionModel().selectionChanged.connect(self.on_select)

        def _set_select_manual():
            self.setSelectionMethod(OWRank.SelectManual)

        view.pressed.connect(_set_select_manual)
        view.verticalHeader().sectionClicked.connect(_set_select_manual)
        view.horizontalHeader().sectionClicked.connect(self.headerClick)

        self.measuresStack = stacked = QStackedWidget(self)
        self.controlArea.layout().addWidget(stacked)

        for scoring_methods in (CLS_SCORES,
                                REG_SCORES,
                                []):
            box = gui.vBox(None, "Scoring Methods" if scoring_methods else None)
            stacked.addWidget(box)
            for method in scoring_methods:
                box.layout().addWidget(QCheckBox(
                    method.name, self,
                    objectName=method.shortname,  # To be easily found in tests
                    checked=method.name in self.selected_methods,
                    stateChanged=partial(self.methodSelectionChanged, method_name=method.name)))
            gui.rubber(box)

        gui.rubber(self.controlArea)
        self.switchProblemType(ProblemType.CLASSIFICATION)

        selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox, self, "nSelected", 1, 100,
                     callback=lambda: self.setSelectionMethod(OWRank.SelectNBest))

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectionMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        self.resize(690, 500)

    def switchProblemType(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.measuresStack.setCurrentIndex(index)
        self.problem_type_mode = index

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selected_rows = []
        self.ranksModel.clear()
        self.ranksModel.resetSorting(True)

        self.get_method_scores.cache_clear()
        self.get_scorer_scores.cache_clear()

        self.Error.clear()
        self.Information.clear()
        self.Information.missings_imputed(
            shown=data is not None and data.has_missing())

        if data is not None and not len(data.domain.attributes):
            data = None
            self.Error.no_attributes()
        self.data = data
        self.switchProblemType(ProblemType.CLASSIFICATION)
        if self.data is not None:
            domain = self.data.domain

            if domain.has_discrete_class:
                problem_type = ProblemType.CLASSIFICATION
            elif domain.has_continuous_class:
                problem_type = ProblemType.REGRESSION
            elif not domain.class_var:
                self.Information.no_target_var()
                problem_type = ProblemType.UNSUPERVISED
            else:
                # This can happen?
                self.Error.invalid_type(type(domain.class_var).__name__)
                problem_type = None

            if problem_type is not None:
                self.switchProblemType(problem_type)

            self.ranksModel.setVerticalHeaderLabels(domain.attributes)
            self.ranksView.setVHeaderFixedWidthFromLabel(
                max((a.name for a in domain.attributes), key=len))

            self.selectionMethod = OWRank.SelectNBest

        self.openContext(data)
        self.selectButtons.button(self.selectionMethod).setChecked(True)

    def handleNewSignals(self):
        self.setStatusMessage('Running')
        self.updateScores()
        self.setStatusMessage('')
        self.on_select()

    @Inputs.scorer
    def set_learner(self, scorer, id):
        if scorer is None:
            self.scorers.pop(id, None)
        else:
            # Avoid caching a (possibly stale) previous instance of the same
            # Scorer passed via the same signal
            if id in self.scorers:
                self.get_scorer_scores.cache_clear()

            self.scorers[id] = ScoreMeta(scorer.name, scorer.name, scorer,
                                         ProblemType.from_variable(scorer.class_type),
                                         False)

    @memoize_method()
    def get_method_scores(self, method):
        estimator = method.scorer()
        data = self.data
        try:
            scores = np.asarray(estimator(data))
        except ValueError:
            log.warning("Scorer %s wasn't able to compute all scores at once",
                        method.name)
            try:
                scores = np.array([estimator(data, attr)
                                   for attr in data.domain.attributes])
            except ValueError:
                log.error(
                    "Scorer %s wasn't able to compute scores at all",
                    method.name)
                scores = np.full(len(data.domain.attributes), np.nan)
        return scores

    @memoize_method()
    def get_scorer_scores(self, scorer):
        try:
            scores = scorer.scorer.score_data(self.data).T
        except ValueError:
            log.error(
                "Scorer %s wasn't able to compute scores at all",
                scorer.name)
            scores = np.full((len(self.data.domain.attributes), 1), np.nan)

        labels = ((scorer.shortname,)
                  if scores.shape[1] == 1 else
                  tuple(scorer.shortname + '_' + str(i)
                        for i in range(1, 1 + scores.shape[1])))
        return scores, labels

    def updateScores(self):
        if self.data is None:
            self.ranksModel.clear()
            self.Outputs.scores.send(None)
            return

        methods = [method
                   for method in SCORES
                   if (method.name in self.selected_methods and
                       method.problem_type == self.problem_type_mode and
                       (not issparse(self.data.X) or
                        method.scorer.supports_sparse_data))]

        scorers = []
        self.Error.inadequate_learner.clear()
        for scorer in self.scorers.values():
            if scorer.problem_type in (self.problem_type_mode, ProblemType.UNSUPERVISED):
                scorers.append(scorer)
            else:
                self.Error.inadequate_learner(scorer.name, scorer.learner_adequacy_err_msg)

        method_scores = tuple(self.get_method_scores(method)
                              for method in methods)

        scorer_scores, scorer_labels = (), ()
        if scorers:
            scorer_scores, scorer_labels = zip(*(self.get_scorer_scores(scorer)
                                                 for scorer in scorers))
            scorer_labels = tuple(chain.from_iterable(scorer_labels))

        labels = tuple(method.shortname for method in methods) + scorer_labels
        model_array = np.column_stack(
            ([len(a.values) if a.is_discrete else np.nan
              for a in self.data.domain.attributes],) +
            (method_scores if method_scores else ()) +
            (scorer_scores if scorer_scores else ())
        )
        for column, values in enumerate(model_array.T):
            self.ranksModel.setExtremesFrom(column, values)

        self.ranksModel.wrap(model_array.tolist())
        self.ranksModel.setHorizontalHeaderLabels(('#',) + labels)
        self.ranksView.setColumnWidth(0, 40)

        # Re-apply sort
        try:
            sort_column, sort_order = self.sorting
            if sort_column < len(labels):
                # adds 1 for '#' (discrete count) column
                self.ranksModel.sort(sort_column + 1, sort_order)
                self.ranksView.horizontalHeader().setSortIndicator(sort_column + 1, sort_order)
        except ValueError:
            pass

        self.autoSelection()
        self.Outputs.scores.send(self.create_scores_table(labels))

    def on_select(self):
        # Save indices of attributes in the original, unsorted domain
        self.selected_rows = self.ranksModel.mapToSourceRows([
            i.row() for i in self.ranksView.selectionModel().selectedRows(0)])
        self.commit()

    def setSelectionMethod(self, method):
        self.selectionMethod = method
        self.selectButtons.button(method).setChecked(True)
        self.autoSelection()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        model = self.ranksModel
        rowCount = model.rowCount()
        columnCount = model.columnCount()

        if self.selectionMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectionMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0),
                model.index(rowCount - 1, columnCount - 1)
            )
        elif self.selectionMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0),
                model.index(nSelected - 1, columnCount - 1)
            )
        else:
            selection = QItemSelection()
            if len(self.selected_rows):
                for row in model.mapFromSourceRows(self.selected_rows):
                    selection.append(QItemSelectionRange(
                        model.index(row, 0), model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectionMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        sort_order = self.ranksModel.sortOrder()
        sort_column = self.ranksModel.sortColumn() - 1  # -1 for '#' (discrete count) column
        self.sorting = (sort_column, sort_order)

    def methodSelectionChanged(self, state, method_name):
        if state == Qt.Checked:
            self.selected_methods.add(method_name)
        elif method_name in self.selected_methods:
            self.selected_methods.remove(method_name)

        self.updateScores()

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        selected_attrs = []
        if self.data is not None:
            selected_attrs = [self.data.domain.attributes[i]
                              for i in self.selected_rows]
        if not selected_attrs:
            self.Outputs.reduced_data.send(None)
            self.Outputs.features.send(None)
            self.out_domain_desc = None
        else:
            reduced_domain = Domain(
                selected_attrs, self.data.domain.class_var, self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)
            self.Outputs.features.send(AttributeList(selected_attrs))
            self.out_domain_desc = report.describe_domain(data.domain)

    def create_scores_table(self, labels):
        model_list = self.ranksModel.tolist()
        if not model_list or len(model_list[0]) == 1:  # Empty or just n_values column
            return None

        domain = Domain([ContinuousVariable(label) for label in labels],
                        metas=[StringVariable("Feature")])

        # Prevent np.inf scores
        finfo = np.finfo(np.float64)
        scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max)

        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        # If older settings, restore sort header to default
        # Saved selected_rows will likely be incorrect
        if version is None or version < 2:
            column, order = 0, Qt.DescendingOrder
            headerState = settings.pop("headerState", None)

            # Lacking knowledge of last problemType, use discrete ranks view's ordering
            if isinstance(headerState, (tuple, list)):
                headerState = headerState[0]

            if isinstance(headerState, bytes):
                hview = QHeaderView(Qt.Horizontal)
                hview.restoreState(headerState)
                column, order = hview.sortIndicatorSection() - 1, hview.sortIndicatorOrder()
            settings["sorting"] = (column, order)

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 2:
            # Old selection was saved as sorted indices. New selection is original indices.
            # Since we can't devise the latter without first computing the ranks,
            # just reset the selection to avoid confusion.
            context.values['selected_rows'] = []
Ejemplo n.º 11
0
class OWKeywords(OWWidget, ConcurrentWidgetMixin):
    name = "Extract Keywords"
    description = "Infers characteristic words from the input corpus."
    icon = "icons/Keywords.svg"
    priority = 1100
    keywords = ["characteristic", "term"]

    DEFAULT_SORTING = (1, Qt.DescendingOrder)

    settingsHandler = DomainContextHandler()
    selected_scoring_methods: Set[str] = Setting({ScoringMethods.TF_IDF})
    yake_lang_index: int = Setting(YAKE_LANGUAGES.index("English"))
    rake_lang_index: int = Setting(RAKE_LANGUAGES.index("English"))
    agg_method: int = Setting(AggregationMethods.MEAN)
    sel_method: int = ContextSetting(SelectionMethods.N_BEST)
    n_selected: int = ContextSetting(3)
    sort_column_order: Tuple[int, int] = Setting(DEFAULT_SORTING)
    selected_words = ContextSetting([], schema_only=True)
    auto_apply: bool = Setting(True)

    class Inputs:
        corpus = Input("Corpus", Corpus)
        words = Input("Words", Table)

    class Outputs:
        words = Output("Words", Corpus)

    class Warning(OWWidget.Warning):
        no_words_column = Msg("Input is missing 'Words' column.")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.corpus: Optional[Corpus] = None
        self.words: Optional[List] = None
        self.__cached_keywords = {}
        self.model = KeywordsTableModel(parent=self)
        self._setup_gui()

    def _setup_gui(self):
        grid = QGridLayout()
        box = gui.widgetBox(self.controlArea, "Scoring Methods", grid)

        yake_cb = gui.comboBox(self.controlArea,
                               self,
                               "yake_lang_index",
                               items=YAKE_LANGUAGES,
                               callback=self.__on_yake_lang_changed)
        rake_cb = gui.comboBox(self.controlArea,
                               self,
                               "rake_lang_index",
                               items=RAKE_LANGUAGES,
                               callback=self.__on_rake_lang_changed)

        for i, (method_name, _) in enumerate(ScoringMethods.ITEMS):
            check_box = QCheckBox(method_name, self)
            check_box.setChecked(method_name in self.selected_scoring_methods)
            check_box.stateChanged.connect(
                lambda state, name=method_name: self.
                __on_scoring_method_state_changed(state, name))
            box.layout().addWidget(check_box, i, 0)
            if method_name == ScoringMethods.YAKE:
                box.layout().addWidget(yake_cb, i, 1)
            if method_name == ScoringMethods.RAKE:
                box.layout().addWidget(rake_cb, i, 1)

        box = gui.vBox(self.controlArea, "Aggregation")
        gui.comboBox(box,
                     self,
                     "agg_method",
                     items=AggregationMethods.ITEMS,
                     callback=self.update_scores)

        box = gui.vBox(self.controlArea, "Select Words")
        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(grid)

        self.__sel_method_buttons = QButtonGroup()
        for method, label in enumerate(SelectionMethods.ITEMS):
            button = QRadioButton(label)
            button.setChecked(method == self.sel_method)
            grid.addWidget(button, method, 0)
            self.__sel_method_buttons.addButton(button, method)
        self.__sel_method_buttons.buttonClicked[int].connect(
            self._set_selection_method)

        spin = gui.spin(box,
                        self,
                        "n_selected",
                        1,
                        999,
                        addToLayout=False,
                        callback=lambda: self._set_selection_method(
                            SelectionMethods.N_BEST))
        grid.addWidget(spin, 3, 1)

        gui.rubber(self.controlArea)
        gui.auto_send(self.buttonsArea, self, "auto_apply")

        self.__filter_line_edit = QLineEdit(
            textChanged=self.__on_filter_changed, placeholderText="Filter...")
        self.mainArea.layout().addWidget(self.__filter_line_edit)

        def select_manual():
            self._set_selection_method(SelectionMethods.MANUAL)

        self.view = KeywordsTableView()
        self.view.pressedAny.connect(select_manual)
        self.view.horizontalHeader().setSortIndicator(*self.DEFAULT_SORTING)
        self.view.horizontalHeader().sectionClicked.connect(
            self.__on_horizontal_header_clicked)
        self.mainArea.layout().addWidget(self.view)

        proxy = SortFilterProxyModel()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterCaseSensitivity(False)
        self.view.setModel(proxy)
        self.view.model().setSourceModel(self.model)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection_changed)

    def __on_scoring_method_state_changed(self, state: int, method_name: str):
        if state == Qt.Checked:
            self.selected_scoring_methods.add(method_name)
        elif method_name in self.selected_scoring_methods:
            self.selected_scoring_methods.remove(method_name)
        self.update_scores()

    def __on_yake_lang_changed(self):
        if ScoringMethods.YAKE in self.selected_scoring_methods:
            if ScoringMethods.YAKE in self.__cached_keywords:
                del self.__cached_keywords[ScoringMethods.YAKE]
            self.update_scores()

    def __on_rake_lang_changed(self):
        if ScoringMethods.RAKE in self.selected_scoring_methods:
            if ScoringMethods.RAKE in self.__cached_keywords:
                del self.__cached_keywords[ScoringMethods.RAKE]
            self.update_scores()

    def __on_filter_changed(self):
        model = self.view.model()
        model.setFilterFixedString(self.__filter_line_edit.text().strip())
        self._select_rows()

    def __on_horizontal_header_clicked(self, index: int):
        header = self.view.horizontalHeader()
        self.sort_column_order = (index, header.sortIndicatorOrder())
        self._select_rows()
        # explicitly call commit, because __on_selection_changed will not be
        # invoked, since selection is actually the same, only order is not
        if self.sel_method == SelectionMethods.MANUAL and self.selected_words \
                or self.sel_method == SelectionMethods.ALL:
            self.commit()

    def __on_selection_changed(self):
        selected_rows = self.view.selectionModel().selectedRows(0)
        model = self.view.model()
        self.selected_words = [
            model.data(model.index(i.row(), 0)) for i in selected_rows
        ]
        self.commit()

    @Inputs.corpus
    def set_corpus(self, corpus: Optional[Corpus]):
        self.closeContext()
        self._clear()
        self.corpus = corpus
        self.openContext(self.corpus)
        self.__sel_method_buttons.button(self.sel_method).setChecked(True)

    def _clear(self):
        self.clear_messages()
        self.cancel()
        self.selected_words = []
        self.model.clear()
        self.__cached_keywords = {}

    @Inputs.words
    def set_words(self, words: Optional[Table]):
        self.words = None
        self.Warning.no_words_column.clear()
        if words:
            if WORDS_COLUMN_NAME in words.domain and words.domain[
                    WORDS_COLUMN_NAME].attributes.get("type") == "words":
                self.words = list(words.get_column_view(WORDS_COLUMN_NAME)[0])
            else:
                self.Warning.no_words_column()

    def handleNewSignals(self):
        self.update_scores()

    def update_scores(self):
        kwargs = {
            ScoringMethods.YAKE: {
                "language": YAKE_LANGUAGES[self.yake_lang_index],
                "max_len": self.corpus.ngram_range[1] if self.corpus else 1
            },
            ScoringMethods.RAKE: {
                "language": RAKE_LANGUAGES[self.rake_lang_index],
                "max_len": self.corpus.ngram_range[1] if self.corpus else 1
            },
        }
        self.start(run, self.corpus, self.words, self.__cached_keywords,
                   self.selected_scoring_methods, kwargs, self.agg_method)

    def _set_selection_method(self, method: int):
        self.sel_method = method
        self.__sel_method_buttons.button(method).setChecked(True)
        self._select_rows()

    def _select_rows(self):
        model = self.view.model()
        n_rows, n_columns = model.rowCount(), model.columnCount()
        if self.sel_method == SelectionMethods.NONE:
            selection = QItemSelection()
        elif self.sel_method == SelectionMethods.ALL:
            selection = QItemSelection(model.index(0, 0),
                                       model.index(n_rows - 1, n_columns - 1))
        elif self.sel_method == SelectionMethods.MANUAL:
            selection = QItemSelection()
            for i in range(n_rows):
                word = model.data(model.index(i, 0))
                if word in self.selected_words:
                    _selection = QItemSelection(model.index(i, 0),
                                                model.index(i, n_columns - 1))
                    selection.merge(_selection, QItemSelectionModel.Select)
        elif self.sel_method == SelectionMethods.N_BEST:
            n_sel = min(self.n_selected, n_rows)
            selection = QItemSelection(model.index(0, 0),
                                       model.index(n_sel - 1, n_columns - 1))
        else:
            raise NotImplementedError

        self.view.selectionModel().select(selection,
                                          QItemSelectionModel.ClearAndSelect)

    def on_exception(self, ex: Exception):
        raise ex

    def on_partial_result(self, _: Any):
        pass

    # pylint: disable=arguments-differ
    def on_done(self, results: Results):
        self.__cached_keywords = results.all_keywords
        self.model.wrap(results.scores)
        self.model.setHorizontalHeaderLabels(["Word"] + results.labels)
        self._apply_sorting()
        if self.model.rowCount() > 0:
            self._select_rows()
        else:
            self.__on_selection_changed()

    def _apply_sorting(self):
        if self.model.columnCount() <= self.sort_column_order[0]:
            self.sort_column_order = self.DEFAULT_SORTING

        header = self.view.horizontalHeader()
        current_sorting = (header.sortIndicatorSection(),
                           header.sortIndicatorOrder())
        if current_sorting != self.sort_column_order:
            header.setSortIndicator(*self.sort_column_order)
            # needed to sort nans; 1. column has strings
            # if self.sort_column_order[0] > 0:
            #     self.model.sort(*self.sort_column_order)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def commit(self):
        words = None
        if self.selected_words:
            words_var = StringVariable(WORDS_COLUMN_NAME)
            words_var.attributes = {"type": "words"}
            model = self.model
            attrs = [
                ContinuousVariable(model.headerData(i + 1, Qt.Horizontal))
                for i in range(len(self.selected_scoring_methods))
            ]
            domain = Domain(attrs, metas=[words_var])

            sort_column, reverse = self.sort_column_order
            data = sorted(model, key=lambda a: a[sort_column], reverse=reverse)
            data = [s[1:] + s[:1] for s in data if s[0] in self.selected_words]
            words = Table.from_list(domain, data)
            words.name = "Words"

        self.Outputs.words.send(words)

    def send_report(self):
        if not self.corpus:
            return
        self.report_data("Corpus", self.corpus)
        if self.words is not None:
            self.report_paragraph("Words", ", ".join(self.words))
        self.report_table("Keywords", self.view, num_format="{:.3f}")
Ejemplo n.º 12
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102
    keywords = []

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table)
        scorer = Input("Scorer", score.Scorer, multiple=True)

    class Outputs:
        reduced_data = Output("Reduced Data", Table, default=True)
        scores = Output("Scores", Table)
        features = Output("Features", AttributeList, dynamic=False)

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    nSelected = ContextSetting(5)
    auto_apply = Setting(True)

    sorting = Setting((0, Qt.DescendingOrder))
    selected_methods = Setting(set())

    settings_version = 2
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])
    selectionMethod = ContextSetting(SelectNBest)

    class Information(OWWidget.Information):
        no_target_var = Msg("Data does not have a single target variable. "
                            "You can still connect in unsupervised scorers "
                            "such as PCA.")
        missings_imputed = Msg('Missing values will be imputed as needed.')

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("Scorer {} inadequate: {}")
        no_attributes = Msg("Data does not have a single attribute.")

    def __init__(self):
        super().__init__()
        self.scorers = OrderedDict()
        self.out_domain_desc = None
        self.data = None
        self.problem_type_mode = ProblemType.CLASSIFICATION

        if not self.selected_methods:
            self.selected_methods = {method.name for method in SCORES
                                     if method.is_default}

        # GUI

        self.ranksModel = model = TableModel(parent=self)  # type: TableModel
        self.ranksView = view = TableView(self)            # type: TableView
        self.mainArea.layout().addWidget(view)
        view.setModel(model)
        view.setColumnWidth(0, 30)
        view.selectionModel().selectionChanged.connect(self.on_select)

        def _set_select_manual():
            self.setSelectionMethod(OWRank.SelectManual)

        view.pressed.connect(_set_select_manual)
        view.verticalHeader().sectionClicked.connect(_set_select_manual)
        view.horizontalHeader().sectionClicked.connect(self.headerClick)

        self.measuresStack = stacked = QStackedWidget(self)
        self.controlArea.layout().addWidget(stacked)

        for scoring_methods in (CLS_SCORES,
                                REG_SCORES,
                                []):
            box = gui.vBox(None, "Scoring Methods" if scoring_methods else None)
            stacked.addWidget(box)
            for method in scoring_methods:
                box.layout().addWidget(QCheckBox(
                    method.name, self,
                    objectName=method.shortname,  # To be easily found in tests
                    checked=method.name in self.selected_methods,
                    stateChanged=partial(self.methodSelectionChanged, method_name=method.name)))
            gui.rubber(box)

        gui.rubber(self.controlArea)
        self.switchProblemType(ProblemType.CLASSIFICATION)

        selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox, self, "nSelected", 1, 100,
                     callback=lambda: self.setSelectionMethod(OWRank.SelectNBest))

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectionMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        self.resize(690, 500)

    def switchProblemType(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.measuresStack.setCurrentIndex(index)
        self.problem_type_mode = index

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selected_rows = []
        self.ranksModel.clear()
        self.ranksModel.resetSorting(True)

        self.get_method_scores.cache_clear()
        self.get_scorer_scores.cache_clear()

        self.Error.clear()
        self.Information.clear()
        self.Information.missings_imputed(
            shown=data is not None and data.has_missing())

        if data is not None and not len(data.domain.attributes):
            data = None
            self.Error.no_attributes()
        self.data = data
        self.switchProblemType(ProblemType.CLASSIFICATION)
        if self.data is not None:
            domain = self.data.domain

            if domain.has_discrete_class:
                problem_type = ProblemType.CLASSIFICATION
            elif domain.has_continuous_class:
                problem_type = ProblemType.REGRESSION
            elif not domain.class_var:
                self.Information.no_target_var()
                problem_type = ProblemType.UNSUPERVISED
            else:
                # This can happen?
                self.Error.invalid_type(type(domain.class_var).__name__)
                problem_type = None

            if problem_type is not None:
                self.switchProblemType(problem_type)

            self.ranksModel.setVerticalHeaderLabels(domain.attributes)
            self.ranksView.setVHeaderFixedWidthFromLabel(
                max((a.name for a in domain.attributes), key=len))

            self.selectionMethod = OWRank.SelectNBest

        self.openContext(data)
        self.selectButtons.button(self.selectionMethod).setChecked(True)

    def handleNewSignals(self):
        self.setStatusMessage('Running')
        self.updateScores()
        self.setStatusMessage('')
        self.on_select()

    @Inputs.scorer
    def set_learner(self, scorer, id):
        if scorer is None:
            self.scorers.pop(id, None)
        else:
            # Avoid caching a (possibly stale) previous instance of the same
            # Scorer passed via the same signal
            if id in self.scorers:
                self.get_scorer_scores.cache_clear()

            self.scorers[id] = ScoreMeta(scorer.name, scorer.name, scorer,
                                         ProblemType.from_variable(scorer.class_type),
                                         False)

    @memoize_method()
    def get_method_scores(self, method):
        estimator = method.scorer()
        data = self.data
        try:
            scores = np.asarray(estimator(data))
        except ValueError:
            log.warning("Scorer %s wasn't able to compute all scores at once",
                        method.name)
            try:
                scores = np.array([estimator(data, attr)
                                   for attr in data.domain.attributes])
            except ValueError:
                log.error(
                    "Scorer %s wasn't able to compute scores at all",
                    method.name)
                scores = np.full(len(data.domain.attributes), np.nan)
        return scores

    @memoize_method()
    def get_scorer_scores(self, scorer):
        try:
            scores = scorer.scorer.score_data(self.data).T
        except ValueError:
            log.error(
                "Scorer %s wasn't able to compute scores at all",
                scorer.name)
            scores = np.full((len(self.data.domain.attributes), 1), np.nan)

        labels = ((scorer.shortname,)
                  if scores.shape[1] == 1 else
                  tuple(scorer.shortname + '_' + str(i)
                        for i in range(1, 1 + scores.shape[1])))
        return scores, labels

    def updateScores(self):
        if self.data is None:
            self.ranksModel.clear()
            self.Outputs.scores.send(None)
            return

        methods = [method
                   for method in SCORES
                   if (method.name in self.selected_methods and
                       method.problem_type == self.problem_type_mode and
                       (not issparse(self.data.X) or
                        method.scorer.supports_sparse_data))]

        scorers = []
        self.Error.inadequate_learner.clear()
        for scorer in self.scorers.values():
            if scorer.problem_type in (self.problem_type_mode, ProblemType.UNSUPERVISED):
                scorers.append(scorer)
            else:
                self.Error.inadequate_learner(scorer.name, scorer.learner_adequacy_err_msg)

        method_scores = tuple(self.get_method_scores(method)
                              for method in methods)

        scorer_scores, scorer_labels = (), ()
        if scorers:
            scorer_scores, scorer_labels = zip(*(self.get_scorer_scores(scorer)
                                                 for scorer in scorers))
            scorer_labels = tuple(chain.from_iterable(scorer_labels))

        labels = tuple(method.shortname for method in methods) + scorer_labels
        model_array = np.column_stack(
            ([len(a.values) if a.is_discrete else np.nan
              for a in self.data.domain.attributes],) +
            (method_scores if method_scores else ()) +
            (scorer_scores if scorer_scores else ())
        )
        for column, values in enumerate(model_array.T):
            self.ranksModel.setExtremesFrom(column, values)

        self.ranksModel.wrap(model_array.tolist())
        self.ranksModel.setHorizontalHeaderLabels(('#',) + labels)
        self.ranksView.setColumnWidth(0, 40)

        # Re-apply sort
        try:
            sort_column, sort_order = self.sorting
            if sort_column < len(labels):
                # adds 1 for '#' (discrete count) column
                self.ranksModel.sort(sort_column + 1, sort_order)
                self.ranksView.horizontalHeader().setSortIndicator(sort_column + 1, sort_order)
        except ValueError:
            pass

        self.autoSelection()
        self.Outputs.scores.send(self.create_scores_table(labels))

    def on_select(self):
        # Save indices of attributes in the original, unsorted domain
        self.selected_rows = self.ranksModel.mapToSourceRows([
            i.row() for i in self.ranksView.selectionModel().selectedRows(0)])
        self.commit()

    def setSelectionMethod(self, method):
        self.selectionMethod = method
        self.selectButtons.button(method).setChecked(True)
        self.autoSelection()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        model = self.ranksModel
        rowCount = model.rowCount()
        columnCount = model.columnCount()

        if self.selectionMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectionMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0),
                model.index(rowCount - 1, columnCount - 1)
            )
        elif self.selectionMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0),
                model.index(nSelected - 1, columnCount - 1)
            )
        else:
            selection = QItemSelection()
            if len(self.selected_rows):
                for row in model.mapFromSourceRows(self.selected_rows):
                    selection.append(QItemSelectionRange(
                        model.index(row, 0), model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectionMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        sort_order = self.ranksModel.sortOrder()
        sort_column = self.ranksModel.sortColumn() - 1  # -1 for '#' (discrete count) column
        self.sorting = (sort_column, sort_order)

    def methodSelectionChanged(self, state, method_name):
        if state == Qt.Checked:
            self.selected_methods.add(method_name)
        elif method_name in self.selected_methods:
            self.selected_methods.remove(method_name)

        self.updateScores()

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        selected_attrs = []
        if self.data is not None:
            selected_attrs = [self.data.domain.attributes[i]
                              for i in self.selected_rows]
        if not selected_attrs:
            self.Outputs.reduced_data.send(None)
            self.Outputs.features.send(None)
            self.out_domain_desc = None
        else:
            reduced_domain = Domain(
                selected_attrs, self.data.domain.class_var, self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)
            self.Outputs.features.send(AttributeList(selected_attrs))
            self.out_domain_desc = report.describe_domain(data.domain)

    def create_scores_table(self, labels):
        model_list = self.ranksModel.tolist()
        if not model_list or len(model_list[0]) == 1:  # Empty or just n_values column
            return None

        domain = Domain([ContinuousVariable(label) for label in labels],
                        metas=[StringVariable("Feature")])

        # Prevent np.inf scores
        finfo = np.finfo(np.float64)
        scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max)

        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        # If older settings, restore sort header to default
        # Saved selected_rows will likely be incorrect
        if version is None or version < 2:
            column, order = 0, Qt.DescendingOrder
            headerState = settings.pop("headerState", None)

            # Lacking knowledge of last problemType, use discrete ranks view's ordering
            if isinstance(headerState, (tuple, list)):
                headerState = headerState[0]

            if isinstance(headerState, bytes):
                hview = QHeaderView(Qt.Horizontal)
                hview.restoreState(headerState)
                column, order = hview.sortIndicatorSection() - 1, hview.sortIndicatorOrder()
            settings["sorting"] = (column, order)

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 2:
            # Old selection was saved as sorted indices. New selection is original indices.
            # Since we can't devise the latter without first computing the ranks,
            # just reset the selection to avoid confusion.
            context.values['selected_rows'] = []
Ejemplo n.º 13
0
class OWScoreDocuments(OWWidget, ConcurrentWidgetMixin):
    name = "Score Documents"
    description = ""
    icon = "icons/ScoreDocuments.svg"
    priority = 500

    buttons_area_orientation = Qt.Vertical

    # default order - table sorted in input order
    DEFAULT_SORTING = (-1, Qt.AscendingOrder)

    settingsHandler = PerfectDomainContextHandler()
    auto_commit: bool = Setting(True)
    aggregation: int = Setting(0)

    word_frequency: bool = Setting(True)
    word_appearance: bool = Setting(False)
    embedding_similarity: bool = Setting(False)
    embedding_language: int = Setting(0)

    sort_column_order: Tuple[int, int] = Setting(DEFAULT_SORTING)
    selected_rows: List[int] = ContextSetting([], schema_only=True)
    sel_method: int = ContextSetting(SelectionMethods.N_BEST)
    n_selected: int = ContextSetting(3)

    class Inputs:
        corpus = Input("Corpus", Corpus)
        words = Input("Words", Table)

    class Outputs:
        selected_documents = Output("Selected documents", Corpus, default=True)
        corpus = Output("Corpus", Corpus)

    class Warning(OWWidget.Warning):
        corpus_not_normalized = Msg("Use Preprocess Text to normalize corpus.")

    class Error(OWWidget.Error):
        custom_err = Msg("{}")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self._setup_control_area()
        self._setup_main_area()
        self.corpus = None
        self.words = None
        # saves scores avoid multiple computation of the same score
        self.scores = {}

    def _setup_control_area(self) -> None:
        box = gui.widgetBox(self.controlArea, "Word Scoring Methods")
        for value, (n, _, tt) in SCORING_METHODS.items():
            b = gui.hBox(box, margin=0)
            gui.checkBox(
                b,
                self,
                value,
                label=n,
                callback=self.__setting_changed,
                tooltip=tt,
            )
            if value in ADDITIONAL_OPTIONS:
                value, options = ADDITIONAL_OPTIONS[value]
                gui.comboBox(
                    b,
                    self,
                    value,
                    items=options,
                    callback=self.__setting_changed,
                )

        box = gui.widgetBox(self.controlArea, "Aggregation")
        gui.comboBox(
            box,
            self,
            "aggregation",
            items=[n for n in AGGREGATIONS],
            callback=self.__setting_changed,
        )

        gui.rubber(self.controlArea)

        # select words box
        box = gui.vBox(self.buttonsArea, "Select Documents")
        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)

        self._sel_method_buttons = QButtonGroup()
        for method, label in enumerate(SelectionMethods.ITEMS):
            button = QRadioButton(label)
            button.setChecked(method == self.sel_method)
            grid.addWidget(button, method, 0)
            self._sel_method_buttons.addButton(button, method)
        self._sel_method_buttons.buttonClicked[int].connect(
            self.__set_selection_method)

        spin = gui.spin(
            box,
            self,
            "n_selected",
            1,
            999,
            addToLayout=False,
            callback=lambda: self.__set_selection_method(SelectionMethods.
                                                         N_BEST),
        )
        grid.addWidget(spin, 3, 1)
        box.layout().addLayout(grid)

        # autocommit
        gui.auto_send(self.buttonsArea, self, "auto_commit")

    def _setup_main_area(self) -> None:
        self._filter_line_edit = QLineEdit(
            textChanged=self.__on_filter_changed, placeholderText="Filter...")
        self.mainArea.layout().addWidget(self._filter_line_edit)

        self.model = model = ScoreDocumentsTableModel(parent=self)
        model.setHorizontalHeaderLabels(["Document"])

        def select_manual():
            self.__set_selection_method(SelectionMethods.MANUAL)

        self.view = view = ScoreDocumentsTableView()
        view.pressedAny.connect(select_manual)
        self.mainArea.layout().addWidget(view)
        # by default data are sorted in the Table order
        header = self.view.horizontalHeader()
        header.sectionClicked.connect(self.__on_horizontal_header_clicked)

        proxy_model = ScoreDocumentsProxyModel()
        proxy_model.setFilterKeyColumn(0)
        proxy_model.setFilterCaseSensitivity(False)
        view.setModel(proxy_model)
        view.model().setSourceModel(self.model)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection_change)

    def __on_filter_changed(self) -> None:
        model = self.view.model()
        model.setFilterFixedString(self._filter_line_edit.text().strip())

    def __on_horizontal_header_clicked(self, index: int):
        header = self.view.horizontalHeader()
        self.sort_column_order = (index, header.sortIndicatorOrder())
        self._select_rows()
        # when sorting change output table must consider the new order
        # call explicitly since selection in table is not changed
        if (self.sel_method == SelectionMethods.MANUAL and self.selected_rows
                or self.sel_method == SelectionMethods.ALL):
            # retrieve selection in new order
            self.selected_rows = self.get_selected_indices()
            self._send_output()

    def __on_selection_change(self):
        self.selected_rows = self.get_selected_indices()
        self._send_output()

    def __set_selection_method(self, method: int):
        self.sel_method = method
        self._sel_method_buttons.button(method).setChecked(True)
        self._select_rows()

    @Inputs.corpus
    def set_data(self, corpus: Corpus) -> None:
        self.closeContext()
        self.Warning.corpus_not_normalized.clear()
        if corpus is None:
            self.corpus = None
            self._clear_and_run()
            return
        if not self._is_corpus_normalized(corpus):
            self.Warning.corpus_not_normalized()
        self.corpus = corpus
        self.selected_rows = []
        self.openContext(corpus)
        self._sel_method_buttons.button(self.sel_method).setChecked(True)
        self._clear_and_run()

    @staticmethod
    def _get_word_attribute(words: Table) -> None:
        attrs = [
            a for a in words.domain.metas + words.domain.variables
            if isinstance(a, StringVariable)
        ]
        if not attrs:
            return None
        words_attr = next(
            (a for a in attrs if a.attributes.get("type", "") == "words"),
            None)
        if words_attr:
            return words.get_column_view(words_attr)[0].tolist()
        else:
            # find the most suitable attribute - one with lowest average text
            # length - counted as a number of words
            def avg_len(attr):
                array_ = words.get_column_view(attr)[0]
                array_ = array_[~isnull(array_)]
                return sum(len(a.split()) for a in array_) / len(array_)

            attr = sorted(attrs, key=avg_len)[0]
            return words.get_column_view(attr)[0].tolist()

    @Inputs.words
    def set_words(self, words: Table) -> None:
        if words is None or len(words.domain.variables +
                                words.domain.metas) == 0:
            self.words = None
        else:
            self.words = self._get_word_attribute(words)
        self._clear_and_run()

    def _gather_scores(self) -> Tuple[np.ndarray, List[str]]:
        """
        Gather scores and labels for the dictionary that holds scores

        Returns
        -------
        scores
            Scores table
        labels
            The list with score names for the header and variables names
        """
        if self.corpus is None:
            return np.empty((0, 0)), []
        aggregation = self._get_active_aggregation()
        scorers = self._get_active_scorers()
        methods = [m for m in scorers if (m, aggregation) in self.scores]
        scores = [self.scores[(m, aggregation)] for m in methods]
        scores = np.column_stack(scores) if scores else np.empty(
            (len(self.corpus), 0))
        labels = [SCORING_METHODS[m][0] for m in methods]
        return scores, labels

    def _send_output(self) -> None:
        """
        Create corpus with scores and output it
        """
        if self.corpus is None:
            self.Outputs.corpus.send(None)
            self.Outputs.selected_documents.send(None)
            return

        scores, labels = self._gather_scores()
        if labels:
            d = self.corpus.domain
            domain = Domain(
                d.attributes,
                d.class_var,
                metas=d.metas + tuple(
                    ContinuousVariable(get_unique_names(d, l))
                    for l in labels),
            )
            out_corpus = Corpus(
                domain,
                self.corpus.X,
                self.corpus.Y,
                np.hstack([self.corpus.metas, scores]),
            )
            Corpus.retain_preprocessing(self.corpus, out_corpus)
        else:
            out_corpus = self.corpus

        self.Outputs.corpus.send(
            create_annotated_table(out_corpus, self.selected_rows))
        self.Outputs.selected_documents.send(
            out_corpus[self.selected_rows] if self.selected_rows else None)

    def _fill_table(self) -> None:
        """
        Fill the table in the widget with scores and document names
        """
        if self.corpus is None:
            self.model.clear()
            return
        scores, labels = self._gather_scores()
        labels = ["Document"] + labels
        titles = self.corpus.titles.tolist()

        # clearing selection and sorting to prevent SEGFAULT on model.wrap
        self.view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
        with disconnected(self.view.selectionModel().selectionChanged,
                          self.__on_selection_change):
            self.view.clearSelection()

        self.model.fill_table(titles, scores)
        self.model.setHorizontalHeaderLabels(labels)
        self.view.update_column_widths()
        if self.model.columnCount() > self.sort_column_order[0]:
            # if not enough columns do not apply sorting from settings since
            # sorting can besaved for score column while scores are still computing
            # tables is filled before scores are computed with document names
            self.view.horizontalHeader().setSortIndicator(
                *self.sort_column_order)

        self._select_rows()

    def _fill_and_output(self) -> None:
        """Fill the table in the widget and send the output"""
        self._fill_table()
        self._send_output()

    def _clear_and_run(self) -> None:
        """Clear cached scores and commit"""
        self.scores = {}
        self.cancel()
        self._fill_and_output()
        self.commit()

    def __setting_changed(self) -> None:
        self.commit()

    def commit(self) -> None:
        self.Error.custom_err.clear()
        self.cancel()
        if self.corpus is not None and self.words is not None:
            scorers = self._get_active_scorers()
            aggregation = self._get_active_aggregation()
            new_scores = [
                s for s in scorers if (s, aggregation) not in self.scores
            ]
            if new_scores:
                self.start(
                    _run,
                    self.corpus,
                    self.words,
                    new_scores,
                    aggregation,
                    {
                        v: items[getattr(self, v)]
                        for v, items in ADDITIONAL_OPTIONS.values()
                    },
                )
            else:
                self._fill_and_output()

    def on_done(self, _: None) -> None:
        self._send_output()

    def on_partial_result(self, result: Tuple[str, str, np.ndarray]) -> None:
        sc_method, aggregation, scores = result
        self.scores[(sc_method, aggregation)] = scores
        self._fill_table()

    def on_exception(self, ex: Exception) -> None:
        self.Error.custom_err(ex)
        self._fill_and_output()

    def _get_active_scorers(self) -> List[str]:
        """
        Gather currently active/selected scores

        Returns
        -------
        List with selected scores names
        """
        return [attr for attr in SCORING_METHODS if getattr(self, attr)]

    def _get_active_aggregation(self) -> str:
        """
        Gather currently active/selected aggregation

        Returns
        -------
        Selected aggregation name
        """
        return list(AGGREGATIONS.keys())[self.aggregation]

    @staticmethod
    def _is_corpus_normalized(corpus: Corpus) -> bool:
        """
        Check if corpus is normalized.
        """
        return any(
            isinstance(pp, BaseNormalizer)
            for pp in corpus.used_preprocessor.preprocessors)

    def get_selected_indices(self) -> List[int]:
        # get indices in table's order - that the selected output table have same order
        selected_rows = sorted(self.view.selectionModel().selectedRows(),
                               key=lambda idx: idx.row())
        return [self.view.model().mapToSource(r).row() for r in selected_rows]

    def _select_rows(self):
        proxy_model = self.view.model()
        n_rows, n_columns = proxy_model.rowCount(), proxy_model.columnCount()
        if self.sel_method == SelectionMethods.NONE:
            selection = QItemSelection()
        elif self.sel_method == SelectionMethods.ALL:
            selection = QItemSelection(
                proxy_model.index(0, 0),
                proxy_model.index(n_rows - 1, n_columns - 1))
        elif self.sel_method == SelectionMethods.MANUAL:
            selection = QItemSelection()
            new_sel = []
            for row in self.selected_rows:
                if row < n_rows:
                    new_sel.append(row)
                    _selection = QItemSelection(
                        self.model.index(row, 0),
                        self.model.index(row, n_columns - 1))
                    selection.merge(
                        proxy_model.mapSelectionFromSource(_selection),
                        QItemSelectionModel.Select,
                    )
            # selected rows must be updated when the same dataset with less rows
            # appear at the input - it is not handled by selectionChanged
            # in cases when all selected rows missing in new table
            self.selected_rows = new_sel
        elif self.sel_method == SelectionMethods.N_BEST:
            n_sel = min(self.n_selected, n_rows)
            selection = QItemSelection(
                proxy_model.index(0, 0),
                proxy_model.index(n_sel - 1, n_columns - 1))
        else:
            raise NotImplementedError

        self.view.selectionModel().select(selection,
                                          QItemSelectionModel.ClearAndSelect)
Ejemplo n.º 14
0
class SelectGenesEditor(ScBaseEditor):
    DEFAULT_N_GENS = 1000
    DEFAULT_METHOD = SelectMostVariableGenes.Dispersion
    DEFAULT_COMPUTE_STATS = True
    DEFAULT_N_GROUPS = 20

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.setLayout(QVBoxLayout())
        self._n_genes = self.DEFAULT_N_GENS
        self._n_groups = self.DEFAULT_N_GROUPS

        form = QFormLayout()
        self.n_genes_spin = QSpinBox(minimum=1,
                                     maximum=10**6,
                                     value=self._n_genes)
        self.n_genes_spin.valueChanged[int].connect(self._set_n_genes)
        self.n_genes_spin.editingFinished.connect(self.edited)
        form.addRow("Number of genes:", self.n_genes_spin)
        self.layout().addLayout(form)

        disp_b = QRadioButton("Dispersion", checked=True)
        vari_b = QRadioButton("Variance")
        mean_b = QRadioButton("Mean")
        self.group = QButtonGroup()
        self.group.buttonClicked.connect(self._on_button_clicked)
        for i, button in enumerate([disp_b, vari_b, mean_b]):
            index = index_to_enum(SelectMostVariableGenes.Method, i).value
            self.group.addButton(button, index - 1)
            form.addRow(button)

        self.stats_check = QCheckBox("Compute statistics for",
                                     checked=self.DEFAULT_COMPUTE_STATS)
        self.stats_check.clicked.connect(self.edited)
        self.n_groups_spin = QSpinBox(minimum=1, value=self._n_groups)
        self.n_groups_spin.valueChanged[int].connect(self._set_n_groups)
        self.n_groups_spin.editingFinished.connect(self.edited)

        box = QHBoxLayout()
        box.addWidget(self.stats_check)
        box.addWidget(self.n_groups_spin)
        box.addWidget(QLabel("gene groups."))
        box.addStretch()
        self.layout().addLayout(box)

    def _set_n_genes(self, n):
        if self._n_genes != n:
            self._n_genes = n
            self.n_genes_spin.setValue(n)
            self.changed.emit()

    def _set_n_groups(self, n):
        if self._n_groups != n:
            self._n_groups = n
            self.n_groups_spin.setValue(n)
            self.changed.emit()

    def _on_button_clicked(self):
        self.changed.emit()
        self.edited.emit()

    def setParameters(self, params):
        self._set_n_genes(params.get("n_genes", self.DEFAULT_N_GENS))
        method = params.get("method", self.DEFAULT_METHOD)
        index = enum_to_index(SelectMostVariableGenes.Method, method)
        self.group.buttons()[index].setChecked(True)
        compute_stats = params.get("compute_stats", self.DEFAULT_COMPUTE_STATS)
        self.stats_check.setChecked(compute_stats)
        self._set_n_groups(params.get("n_groups", self.DEFAULT_N_GROUPS))

    def parameters(self):
        method = index_to_enum(SelectMostVariableGenes.Method,
                               self.group.checkedId())
        return {
            "n_genes": self._n_genes,
            "method": method,
            "compute_stats": self.stats_check.isChecked(),
            "n_groups": self._n_groups
        }

    @staticmethod
    def createinstance(params):
        method = params.get("method", SelectGenesEditor.DEFAULT_METHOD)
        n_genes = params.get("n_genes", SelectGenesEditor.DEFAULT_N_GENS)
        compute_stats = params.get("compute_stats",
                                   SelectGenesEditor.DEFAULT_COMPUTE_STATS)
        n_groups = params.get("n_groups", SelectGenesEditor.DEFAULT_N_GROUPS) \
            if compute_stats else None
        return SelectMostVariableGenes(method, n_genes, n_groups)

    def __repr__(self):
        method = self.group.button(self.group.checkedId()).text()
        text = "Method: {}, Number of Genes: {}".format(method, self._n_genes)
        if self.stats_check.isChecked():
            text += ", Number of Groups: {}".format(self._n_groups)
        return text
Ejemplo n.º 15
0
class NormalizeEditor(ScBaseEditor):
    DEFAULT_GROUP_BY = False
    DEFAULT_GROUP_VAR = None
    DEFAULT_METHOD = Normalize.CPM

    def __init__(self, parent=None, master=None, **kwargs):
        super().__init__(parent, **kwargs)
        self._group_var = self.DEFAULT_GROUP_VAR
        self._master = master
        self._master.input_data_changed.connect(self._set_model)
        self.setLayout(QVBoxLayout())

        form = QFormLayout()
        cpm_b = QRadioButton("Counts per million", checked=True)
        med_b = QRadioButton("Median")
        self.group = QButtonGroup()
        self.group.buttonClicked.connect(self._on_button_clicked)
        for i, button in enumerate([cpm_b, med_b]):
            index = index_to_enum(Normalize.Method, i).value
            self.group.addButton(button, index - 1)
            form.addRow(button)

        self.group_by_check = QCheckBox("Cell Groups: ",
                                        enabled=self.DEFAULT_GROUP_BY)
        self.group_by_check.clicked.connect(self.edited)
        self.group_by_combo = QComboBox(enabled=self.DEFAULT_GROUP_BY)
        self.group_by_model = DomainModel(order=(DomainModel.METAS,
                                                 DomainModel.CLASSES),
                                          valid_types=DiscreteVariable,
                                          alphabetical=True)
        self.group_by_combo.setModel(self.group_by_model)
        self.group_by_combo.currentIndexChanged.connect(self.changed)
        self.group_by_combo.activated.connect(self.edited)

        form.addRow(self.group_by_check, self.group_by_combo)
        self.layout().addLayout(form)

        self._set_model()

    def _set_model(self):
        data = self._master.data
        self.group_by_model.set_domain(data and data.domain)
        enable = bool(self.group_by_model)
        self.group_by_check.setChecked(False)
        self.group_by_check.setEnabled(enable)
        self.group_by_combo.setEnabled(enable)
        if self.group_by_model:
            self.group_by_combo.setCurrentIndex(0)
            if self._group_var and self._group_var in data.domain:
                index = self.group_by_model.indexOf(self._group_var)
                self.group_by_combo.setCurrentIndex(index)
        else:
            self.group_by_combo.setCurrentText(None)

    def _on_button_clicked(self):
        self.changed.emit()
        self.edited.emit()

    def setParameters(self, params):
        method = params.get("method", self.DEFAULT_METHOD)
        index = enum_to_index(Normalize.Method, method)
        self.group.buttons()[index].setChecked(True)
        self._group_var = params.get("group_var", self.DEFAULT_GROUP_VAR)
        group = bool(self._group_var and self.group_by_model)
        if group:
            index = self.group_by_model.indexOf(self._group_var)
            self.group_by_combo.setCurrentIndex(index)
        group_by = params.get("group_by", self.DEFAULT_GROUP_BY)
        self.group_by_check.setChecked(group_by and group)

    def parameters(self):
        index = self.group_by_combo.currentIndex()
        group_var = self.group_by_model[index] if index > -1 else None
        group_by = self.group_by_check.isChecked()
        method = index_to_enum(Normalize.Method, self.group.checkedId())
        return {"group_var": group_var, "group_by": group_by, "method": method}

    @staticmethod
    def createinstance(params):
        group_var = params.get("group_var")
        group_by = params.get("group_by", NormalizeEditor.DEFAULT_GROUP_BY)
        method = params.get("method", NormalizeEditor.DEFAULT_METHOD)
        return NormalizeGroups(group_var, method) \
            if group_by and group_var else NormalizeSamples(method)

    def __repr__(self):
        method = self.group.button(self.group.checkedId()).text()
        index = self.group_by_combo.currentIndex()
        group_var = self.group_by_model[index] if index > -1 else None
        group_by = self.group_by_check.isChecked()
        group_text = ", Grouped by: {}".format(group_var) if group_by else ""
        return "Method: {}".format(method) + group_text
Ejemplo n.º 16
0
class OWRank(OWWidget, ConcurrentWidgetMixin):
    name = "排名(Rank)"
    description = "根据数据特征的相关性对其进行排名和筛选。"
    icon = "icons/Rank.svg"
    priority = 1102
    keywords = ['paiming', 'mingci', 'paixu']
    category = "数据(Data)"

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("数据(Data)", Table, replaces=['Data'])
        scorer = MultiInput("评分器(Scorer)",
                            score.Scorer,
                            filter_none=True,
                            replaces=['Scorer'])

    class Outputs:
        reduced_data = Output("选中的数据(Reduced Data)",
                              Table,
                              default=True,
                              replaces=['Reduced Data'])
        scores = Output("评分(Scores)", Table, replaces=['Scores'])
        features = Output("特征(Features)",
                          AttributeList,
                          dynamic=False,
                          replaces=['Features'])

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    nSelected = ContextSetting(5)
    auto_apply = Setting(True)

    sorting = Setting((0, Qt.DescendingOrder))
    selected_methods = Setting(set())

    settings_version = 3
    settingsHandler = DomainContextHandler()
    selected_attrs = ContextSetting([], schema_only=True)
    selectionMethod = ContextSetting(SelectNBest)

    class Information(OWWidget.Information):
        no_target_var = Msg("Data does not have a (single) target variable.")
        missings_imputed = Msg('Missing values will be imputed as needed.')

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("Scorer {} inadequate: {}")
        no_attributes = Msg("Data does not have a single attribute.")

    class Warning(OWWidget.Warning):
        renamed_variables = Msg(
            "Variables with duplicated names have been renamed.")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.scorers: List[ScoreMeta] = []
        self.out_domain_desc = None
        self.data = None
        self.problem_type_mode = ProblemType.CLASSIFICATION

        # results caches
        self.scorers_results = {}
        self.methods_results = {}

        if not self.selected_methods:
            self.selected_methods = {
                method.name
                for method in SCORES if method.is_default
            }

        # GUI
        self.ranksModel = model = TableModel(parent=self)  # type: TableModel
        self.ranksView = view = TableView(self)  # type: TableView
        self.mainArea.layout().addWidget(view)
        view.setModel(model)
        view.setColumnWidth(NVAL_COL, 30)
        view.selectionModel().selectionChanged.connect(self.on_select)

        def _set_select_manual():
            self.setSelectionMethod(OWRank.SelectManual)

        view.manualSelection.connect(_set_select_manual)
        view.verticalHeader().sectionClicked.connect(_set_select_manual)
        view.horizontalHeader().sectionClicked.connect(self.headerClick)

        self.measuresStack = stacked = QStackedWidget(self)
        self.controlArea.layout().addWidget(stacked)

        for scoring_methods in (CLS_SCORES, REG_SCORES, []):
            box = gui.vBox(None, "评分方法" if scoring_methods else None)
            stacked.addWidget(box)
            for method in scoring_methods:
                box.layout().addWidget(
                    QCheckBox(
                        method.zh_name,
                        self,
                        objectName=method.
                        shortname,  # To be easily found in tests
                        checked=method.name in self.selected_methods,
                        stateChanged=partial(self.methodSelectionChanged,
                                             method_name=method.name)))
            gui.rubber(box)

        gui.rubber(self.controlArea)

        self.switchProblemType(ProblemType.CLASSIFICATION)

        selMethBox = gui.vBox(self.buttonsArea, "选择特征")

        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)
        grid.setSpacing(6)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("无"), OWRank.SelectNone)
        b2 = button(self.tr("所有"), OWRank.SelectAll)
        b3 = button(self.tr("手动"), OWRank.SelectManual)
        b4 = button(self.tr("最佳排名:"), OWRank.SelectNBest)

        s = gui.spin(
            selMethBox,
            self,
            "nSelected",
            1,
            999,
            callback=lambda: self.setSelectionMethod(OWRank.SelectNBest),
            addToLayout=False)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectionMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_send(self.buttonsArea, self, "auto_apply")

        self.resize(690, 500)

    def switchProblemType(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.measuresStack.setCurrentIndex(index)
        self.problem_type_mode = index

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selected_attrs = []
        self.ranksModel.clear()
        self.ranksModel.resetSorting(True)

        self.scorers_results = {}
        self.methods_results = {}
        self.cancel()

        self.Error.clear()
        self.Information.clear()
        self.Information.missings_imputed(
            shown=data is not None and data.has_missing())

        if data is not None and not data.domain.attributes:
            data = None
            self.Error.no_attributes()
        self.data = data
        self.switchProblemType(ProblemType.CLASSIFICATION)
        if self.data is not None:
            domain = self.data.domain
            if domain.has_discrete_class:
                problem_type = ProblemType.CLASSIFICATION
            elif domain.has_continuous_class:
                problem_type = ProblemType.REGRESSION
            elif not domain.class_var:
                self.Information.no_target_var()
                problem_type = ProblemType.UNSUPERVISED
            else:
                # This can happen?
                self.Error.invalid_type(type(domain.class_var).__name__)
                problem_type = None

            if problem_type is not None:
                self.switchProblemType(problem_type)

            self.selectionMethod = OWRank.SelectNBest

        self.openContext(data)
        self.selectButtons.button(self.selectionMethod).setChecked(True)

    def handleNewSignals(self):
        self.setStatusMessage('Running')
        self.update_scores()
        self.setStatusMessage('')
        self.on_select()

    @Inputs.scorer
    def set_learner(self, index, scorer):
        self.scorers[index] = ScoreMeta(
            scorer.name, scorer.name, scorer,
            ProblemType.from_variable(scorer.class_type), False)
        self.scorers_results = {}

    @Inputs.scorer.insert
    def insert_learner(self, index: int, scorer):
        self.scorers.insert(
            index,
            ScoreMeta(scorer.name, scorer.name, scorer,
                      ProblemType.from_variable(scorer.class_type), False))
        self.scorers_results = {}

    @Inputs.scorer.remove
    def remove_learner(self, index):
        self.scorers.pop(index)
        self.scorers_results = {}

    def _get_methods(self):
        return [
            method for method in SCORES if
            (method.name in self.selected_methods
             and method.problem_type == self.problem_type_mode and
             (not issparse(self.data.X) or method.scorer.supports_sparse_data))
        ]

    def _get_scorers(self):
        scorers = []
        for scorer in self.scorers:
            if scorer.problem_type in (
                    self.problem_type_mode,
                    ProblemType.UNSUPERVISED,
            ):
                scorers.append(scorer)
            else:
                self.Error.inadequate_learner(scorer.name,
                                              scorer.learner_adequacy_err_msg)
        return scorers

    def update_scores(self):
        if self.data is None:
            self.ranksModel.clear()
            self.Outputs.scores.send(None)
            return

        self.Error.inadequate_learner.clear()

        scorers = [
            s for s in self._get_scorers() if s not in self.scorers_results
        ]
        methods = [
            m for m in self._get_methods() if m not in self.methods_results
        ]
        self.start(run, self.data, methods, scorers)

    def on_done(self, result: Results) -> None:
        self.methods_results.update(result.method_scores)
        self.scorers_results.update(result.scorer_scores)

        methods = self._get_methods()
        method_labels = tuple(m.zh_shortname for m in methods)
        method_scores = tuple(self.methods_results[m] for m in methods)

        scores = [self.scorers_results[s] for s in self._get_scorers()]
        scorer_scores, scorer_labels = zip(*scores) if scores else ((), ())

        labels = method_labels + tuple(chain.from_iterable(scorer_labels))
        model_array = np.column_stack(
            (list(self.data.domain.attributes), ) + ([
                float(len(a.values)) if a.is_discrete else np.nan
                for a in self.data.domain.attributes
            ], ) + method_scores + scorer_scores)
        for column, values in enumerate(model_array.T[2:].astype(float),
                                        start=2):
            self.ranksModel.setExtremesFrom(column, values)

        self.ranksModel.wrap(model_array.tolist())
        self.ranksModel.setHorizontalHeaderLabels((
            '',
            '#',
        ) + labels)
        self.ranksView.setColumnWidth(NVAL_COL, 40)
        self.ranksView.resizeColumnToContents(VARNAME_COL)

        # Re-apply sort
        try:
            sort_column, sort_order = self.sorting
            if sort_column < len(labels):
                # adds 2 to skip the first two columns
                self.ranksModel.sort(sort_column + 2, sort_order)
                self.ranksView.horizontalHeader().setSortIndicator(
                    sort_column + 2, sort_order)
        except ValueError:
            pass

        self.autoSelection()
        self.Outputs.scores.send(self.create_scores_table(labels))

    def on_exception(self, ex: Exception) -> None:
        raise ex

    def on_partial_result(self, result: Any) -> None:
        pass

    def on_select(self):
        # Save indices of attributes in the original, unsorted domain
        selected_rows = self.ranksView.selectionModel().selectedRows(0)
        row_indices = [i.row() for i in selected_rows]
        attr_indices = self.ranksModel.mapToSourceRows(row_indices)
        self.selected_attrs = [self.data.domain[idx] for idx in attr_indices]
        self.commit.deferred()

    def setSelectionMethod(self, method):
        self.selectionMethod = method
        self.selectButtons.button(method).setChecked(True)
        self.autoSelection()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        model = self.ranksModel
        rowCount = model.rowCount()
        columnCount = model.columnCount()

        if self.selectionMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectionMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0), model.index(rowCount - 1, columnCount - 1))
        elif self.selectionMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0), model.index(nSelected - 1, columnCount - 1))
        else:
            selection = QItemSelection()
            if self.selected_attrs is not None:
                attr_indices = [
                    self.data.domain.attributes.index(var)
                    for var in self.selected_attrs
                ]
                for row in model.mapFromSourceRows(attr_indices):
                    selection.append(
                        QItemSelectionRange(model.index(row, 0),
                                            model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 2 and self.selectionMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        sort_order = self.ranksModel.sortOrder()
        # -2 for '#' (discrete count) column
        sort_column = self.ranksModel.sortColumn() - 2
        self.sorting = (sort_column, sort_order)

    def methodSelectionChanged(self, state, method_name):
        if state == Qt.Checked:
            self.selected_methods.add(method_name)
        elif method_name in self.selected_methods:
            self.selected_methods.remove(method_name)

        self.update_scores()

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    @gui.deferred
    def commit(self):
        if not self.selected_attrs:
            self.Outputs.reduced_data.send(None)
            self.Outputs.features.send(None)
            self.out_domain_desc = None
        else:
            reduced_domain = Domain(self.selected_attrs,
                                    self.data.domain.class_var,
                                    self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)
            self.Outputs.features.send(AttributeList(self.selected_attrs))
            self.out_domain_desc = report.describe_domain(data.domain)

    def create_scores_table(self, labels):
        self.Warning.renamed_variables.clear()
        model_list = self.ranksModel.tolist()
        # Empty or just first two columns
        if not model_list or len(model_list[0]) == 2:
            return None
        unique, renamed = get_unique_names_duplicates(labels + ('Feature', ),
                                                      return_duplicated=True)
        if renamed:
            self.Warning.renamed_variables(', '.join(renamed))

        domain = Domain([ContinuousVariable(label) for label in unique[:-1]],
                        metas=[StringVariable(unique[-1])])

        # Prevent np.inf scores
        finfo = np.finfo(np.float64)
        scores = np.clip(np.array(model_list)[:, 2:], finfo.min, finfo.max)

        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        # If older settings, restore sort header to default
        # Saved selected_rows will likely be incorrect
        if version is None or version < 2:
            column, order = 0, Qt.DescendingOrder
            headerState = settings.pop("headerState", None)

            # Lacking knowledge of last problemType, use discrete ranks view's ordering
            if isinstance(headerState, (tuple, list)):
                headerState = headerState[0]

            if isinstance(headerState, bytes):
                hview = QHeaderView(Qt.Horizontal)
                hview.restoreState(headerState)
                column, order = hview.sortIndicatorSection(
                ) - 1, hview.sortIndicatorOrder()
            settings["sorting"] = (column, order)

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 3:
            # Selections were stored as indices, so these contexts matched
            # any domain. The only safe thing to do is to remove them.
            raise IncompatibleContext
class OWRankSurvivalFeatures(OWWidget, ConcurrentWidgetMixin):
    name = 'Rank Survival Features'
    # TODO: Add widget metadata
    description = ''
    icon = 'icons/owranksurvivalfeatures.svg'
    priority = 30
    keywords = []

    buttons_area_orientation = Qt.Vertical
    select_none, manual_selection, select_n_best = range(3)

    settingsHandler = DomainContextHandler()
    selected_attrs = ContextSetting([], schema_only=True)
    selection_method = Setting(select_n_best, schema_only=True)
    n_selected = Setting(20, schema_only=True)
    auto_commit: bool = Setting(False, schema_only=True)

    class Inputs:
        data = Input('Data', Table)

    class Outputs:
        reduced_data = Output('Reduced Data', Table, default=True)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.data: Optional[Table] = None
        self.attr_name_to_variable: Optional[Table] = None
        self.covariates_from_worker_result = None
        self.time_var: Optional[str] = None
        self.event_var: Optional[str] = None

        gui.rubber(self.controlArea)

        sel_method_box = gui.vBox(self.buttonsArea, 'Select Attributes')
        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)
        grid.setSpacing(6)
        self.select_buttons = QButtonGroup()
        self.select_buttons.buttonClicked[int].connect(
            self.set_selection_method)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.select_buttons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr('None'), OWRankSurvivalFeatures.select_none)
        b2 = button(self.tr('Manual'), OWRankSurvivalFeatures.manual_selection)
        b3 = button(self.tr('Best ranked:'),
                    OWRankSurvivalFeatures.select_n_best)

        s = gui.spin(
            sel_method_box,
            self,
            'n_selected',
            1,
            999,
            callback=lambda: self.set_selection_method(OWRankSurvivalFeatures.
                                                       select_n_best),
            addToLayout=False,
        )

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(s, 2, 1)

        sel_method_box.layout().addLayout(grid)

        self.commit_button = gui.auto_commit(self.buttonsArea,
                                             self,
                                             'auto_commit',
                                             '&Commit',
                                             box=False)

        # Main area
        self.model = PyTableModel(parent=self)
        self.table_view = TableView(parent=self)
        self.table_view.setModel(self.model)
        self.model.setHorizontalHeaderLabels([
            'Log-Likelihood', 'Log-Likelihood Ratio', f'{"p".center(13)}',
            'FDR'
        ])
        self.table_view.setSizeAdjustPolicy(
            QAbstractScrollArea.AdjustToContentsOnFirstShow)
        self.table_view.selectionModel().selectionChanged.connect(
            self.on_select)

        def _set_select_manual():
            self.set_selection_method(OWRankSurvivalFeatures.manual_selection)

        self.table_view.manualSelection.connect(_set_select_manual)
        self.table_view.verticalHeader().sectionClicked.connect(
            _set_select_manual)

        self.mainArea.layout().addWidget(self.table_view)

    @property
    def covariates(self) -> Optional[List[str]]:
        if not self.data:
            return
        return [attr.name for attr in self.data.domain.attributes]

    @Inputs.data
    @check_survival_data
    def set_data(self, data: Table):
        self.closeContext()
        self.selected_attrs = []
        self.covariates_from_worker_result = []
        self.model.clear()
        self.model.resetSorting()

        if not data:
            return

        self.data = data
        self.attr_name_to_variable = {
            attr.name: attr
            for attr in self.data.domain.attributes
        }

        self.openContext(data)
        time_var, event_var = get_survival_endpoints(self.data.domain)
        self.time_var, self.event_var = time_var.name, event_var.name
        self.start(worker, self.data, self.covariates, self.time_var,
                   self.event_var)

    def commit(self):
        if not self.selected_attrs:
            self.Outputs.reduced_data.send(None)
        else:
            reduced_domain = Domain(self.selected_attrs,
                                    self.data.domain.class_vars,
                                    self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)

    def on_done(self, worker_result):
        covariate_names, results = worker_result

        # wrap everything except covariate names
        self.model.wrap(results.tolist())

        # this is temp solution because covariate orders gets mixed when using multiprocessing
        self.covariates_from_worker_result = covariate_names.tolist()

        # match covariate names to domain variables and set vertical header
        self.model.setVerticalHeaderLabels(
            [self.attr_name_to_variable[name] for name in covariate_names])
        self.table_view.resizeColumnsToContents()

        self.auto_select()

    def on_exception(self, ex):
        raise ex

    def on_partial_result(self, result: Any) -> None:
        pass

    def set_selection_method(self, method):
        self.selection_method = method
        self.select_buttons.button(method).setChecked(True)
        self.auto_select()

    def auto_select(self):
        selection_model = self.table_view.selectionModel()
        row_count = self.model.rowCount()
        column_count = self.model.columnCount()

        if self.selection_method == OWRankSurvivalFeatures.select_none:
            selection = QItemSelection()
        elif self.selection_method == OWRankSurvivalFeatures.select_n_best:
            n_selected = min(self.n_selected, row_count)
            selection = QItemSelection(
                self.model.index(0, 0),
                self.model.index(n_selected - 1, column_count - 1))
        else:
            selection = QItemSelection()
            if self.selected_attrs is not None:
                attr_indices = [
                    self.covariates_from_worker_result.index(var.name)
                    for var in self.selected_attrs
                ]
                for row in self.model.mapFromSourceRows(attr_indices):
                    selection.append(
                        QItemSelectionRange(
                            self.model.index(row, 0),
                            self.model.index(row, column_count - 1)))

        selection_model.select(selection, QItemSelectionModel.ClearAndSelect)

    def on_select(self):
        selected_rows = self.table_view.selectionModel().selectedRows(0)
        row_indices = [i.row() for i in selected_rows]
        attr_indices = self.model.mapToSourceRows(row_indices)
        self.selected_attrs = [
            self.model._headers[Qt.Vertical][row] for row in attr_indices
        ]
        self.commit()