Beispiel #1
0
class OWCreateInstance(OWWidget):
    name = "Create Instance"
    description = "Interactively create a data instance from sample dataset."
    icon = "icons/CreateInstance.svg"
    category = "Data"
    keywords = ["simulator"]
    priority = 4000

    class Inputs:
        data = Input("Data", Table)
        reference = Input("Reference", Table)

    class Outputs:
        data = Output("Data", Table)

    class Information(OWWidget.Information):
        nans_removed = Msg("Variables with only missing values were "
                           "removed from the list.")

    want_main_area = False
    ACTIONS = ["median", "mean", "random", "input"]
    HEADER = [["name", "Variable"], ["variable", "Value"]]
    Header = namedtuple("header",
                        [tag for tag, _ in HEADER])(*range(len(HEADER)))

    values: Dict[str, Union[float, str]] = Setting({}, schema_only=True)
    append_to_data = Setting(True)
    auto_commit = Setting(True)

    def __init__(self):
        super().__init__()
        self.data: Optional[Table] = None
        self.reference: Optional[Table] = None

        self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed,
                                     placeholderText="Filter...")
        self.view = QTableView(sortingEnabled=True,
                               contextMenuPolicy=Qt.CustomContextMenu,
                               selectionMode=QTableView.NoSelection)
        self.view.customContextMenuRequested.connect(self.__menu_requested)
        self.view.setItemDelegateForColumn(self.Header.variable,
                                           VariableDelegate(self))
        self.view.verticalHeader().hide()
        self.view.horizontalHeader().setStretchLastSection(True)
        self.view.horizontalHeader().setMaximumSectionSize(350)

        self.model = VariableItemModel(self)
        self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER])
        self.model.dataChanged.connect(self.__table_data_changed)
        self.model.dataHasNanColumn.connect(self.Information.nans_removed)
        self.proxy_model = QSortFilterProxyModel()
        self.proxy_model.setFilterKeyColumn(-1)
        self.proxy_model.setFilterCaseSensitivity(False)
        self.proxy_model.setSourceModel(self.model)
        self.view.setModel(self.proxy_model)

        vbox = gui.vBox(self.controlArea, box=True)
        vbox.layout().addWidget(self.filter_edit)
        vbox.layout().addWidget(self.view)

        box = gui.hBox(vbox)
        gui.rubber(box)
        for name in self.ACTIONS:
            gui.button(box,
                       self,
                       name.capitalize(),
                       lambda *args, fun=name: self._initialize_values(fun),
                       autoDefault=False)
        gui.rubber(box)

        box = gui.auto_apply(self.controlArea, self, "auto_commit")
        box.button.setFixedWidth(180)
        box.layout().insertStretch(0)
        # pylint: disable=unnecessary-lambda
        append = gui.checkBox(None,
                              self,
                              "append_to_data",
                              "Append this instance to input data",
                              callback=lambda: self.commit())
        box.layout().insertWidget(0, append)

        self._set_input_summary()
        self._set_output_summary()
        self.settingsAboutToBePacked.connect(self.pack_settings)

    def __filter_edit_changed(self):
        self.proxy_model.setFilterFixedString(self.filter_edit.text().strip())

    def __table_data_changed(self):
        self.commit()

    def __menu_requested(self, point: QPoint):
        index = self.view.indexAt(point)
        model: QSortFilterProxyModel = index.model()
        source_index = model.mapToSource(index)
        menu = QMenu(self)
        for action in self._create_actions(source_index):
            menu.addAction(action)
        menu.popup(self.view.viewport().mapToGlobal(point))

    def _create_actions(self, index: QModelIndex) -> List[QAction]:
        actions = []
        for name in self.ACTIONS:
            action = QAction(name.capitalize(), self)
            action.triggered.connect(
                lambda *args, fun=name: self._initialize_values(fun, [index]))
            actions.append(action)
        return actions

    def _initialize_values(self, fun: str, indices: List[QModelIndex] = None):
        cont_fun = {
            "median": np.nanmedian,
            "mean": np.nanmean,
            "random": cont_random,
            "input": np.nanmean
        }.get(fun, NotImplemented)
        disc_fun = {
            "median": majority,
            "mean": majority,
            "random": disc_random,
            "input": majority
        }.get(fun, NotImplemented)

        if not self.data or fun == "input" and not self.reference:
            return

        self.model.dataChanged.disconnect(self.__table_data_changed)
        rows = range(self.proxy_model.rowCount()) if indices is None else \
            [index.row() for index in indices]
        for row in rows:
            index = self.model.index(row, self.Header.variable)
            variable = self.model.data(index, VariableRole)

            if fun == "input":
                if variable not in self.reference.domain:
                    continue
                values = self.reference.get_column_view(variable)[0]
                if variable.is_primitive():
                    values = values.astype(float)
                    if all(np.isnan(values)):
                        continue
            else:
                values = self.model.data(index, ValuesRole)

            if variable.is_continuous:
                value = cont_fun(values)
                value = round(value, variable.number_of_decimals)
            elif variable.is_discrete:
                value = disc_fun(values)
            elif variable.is_string:
                value = ""
            else:
                raise NotImplementedError

            self.model.setData(index, value, ValueRole)
        self.model.dataChanged.connect(self.__table_data_changed)
        self.commit()

    @Inputs.data
    def set_data(self, data: Table):
        self.data = data
        self._set_input_summary()
        self._set_model_data()
        self.unconditional_commit()

    def _set_model_data(self):
        self.Information.nans_removed.clear()
        self.model.removeRows(0, self.model.rowCount())
        if not self.data:
            return

        self.model.set_data(self.data, self.values)
        self.values = {}
        self.view.horizontalHeader().setStretchLastSection(False)
        self.view.resizeColumnsToContents()
        self.view.resizeRowsToContents()
        self.view.horizontalHeader().setStretchLastSection(True)

    @Inputs.reference
    def set_reference(self, data: Table):
        self.reference = data
        self._set_input_summary()

    def _set_input_summary(self):
        n_data = len(self.data) if self.data else 0
        n_refs = len(self.reference) if self.reference else 0
        summary, details, kwargs = self.info.NoInput, "", {}

        if self.data or self.reference:
            summary = f"{self.info.format_number(n_data)}, " \
                      f"{self.info.format_number(n_refs)}"
            data_list = [("Data", self.data), ("Reference", self.reference)]
            details = format_multiple_summaries(data_list)
            kwargs = {"format": Qt.RichText}
        self.info.set_input_summary(summary, details, **kwargs)

    def _set_output_summary(self, data: Optional[Table] = None):
        if data:
            summary, details = len(data), format_summary_details(data)
        else:
            summary, details = self.info.NoOutput, ""
        self.info.set_output_summary(summary, details)

    def commit(self):
        output_data = None
        if self.data:
            output_data = self._create_data_from_values()
            if self.append_to_data:
                output_data = self._append_to_data(output_data)
        self._set_output_summary(output_data)
        self.Outputs.data.send(output_data)

    def _create_data_from_values(self) -> Table:
        data = Table.from_domain(self.data.domain, 1)
        data.name = "created"
        data.X[:] = np.nan
        data.Y[:] = np.nan
        for i, m in enumerate(self.data.domain.metas):
            data.metas[:, i] = "" if m.is_string else np.nan

        values = self._get_values()
        for var_name, value in values.items():
            data[:, var_name] = value
        return data

    def _append_to_data(self, data: Table) -> Table:
        assert self.data
        assert len(data) == 1

        var = DiscreteVariable("Source ID", values=(self.data.name, data.name))
        data = Table.concatenate([self.data, data], axis=0)
        domain = Domain(data.domain.attributes, data.domain.class_vars,
                        data.domain.metas + (var, ))
        data = data.transform(domain)
        data.metas[:len(self.data), -1] = 0
        data.metas[len(self.data):, -1] = 1
        return data

    def _get_values(self) -> Dict[str, Union[str, float]]:
        values = {}
        for row in range(self.model.rowCount()):
            index = self.model.index(row, self.Header.variable)
            values[self.model.data(index, VariableRole).name] = \
                self.model.data(index, ValueRole)
        return values

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_domain("Output", self.data.domain)
        items = []
        values: Dict = self._get_values()
        for var in self.data.domain.variables + self.data.domain.metas:
            val = values.get(var.name, np.nan)
            if var.is_primitive():
                val = var.repr_val(val)
            items.append([f"{var.name}:", val])
        self.report_table("Values", items)

    @staticmethod
    def sizeHint():
        return QSize(600, 500)

    def pack_settings(self):
        self.values: Dict[str, Union[str, float]] = self._get_values()
class OWFeatureStatistics(widget.OWWidget):
    HISTOGRAM_ASPECT_RATIO = (7, 3)
    MINIMUM_HISTOGRAM_HEIGHT = 50
    MAXIMUM_HISTOGRAM_HEIGHT = 80

    name = 'Feature Statistics'
    description = 'Show basic statistics for data features.'
    icon = 'icons/FeatureStatistics.svg'

    class Inputs:
        data = Input('Data', Table, default=True)

    want_main_area = True
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()

    auto_commit = Setting(True)
    color_var = ContextSetting(None)  # type: Optional[Variable]
    filter_string = ContextSetting('')

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.model = None  # type: Optional[FeatureStatisticsTableModel]

        # Information panel
        info_box = gui.vBox(self.controlArea, 'Info')
        info_box.setMinimumWidth(200)
        self.info_summary = gui.widgetLabel(info_box, wordWrap=True)
        self.info_attr = gui.widgetLabel(info_box, wordWrap=True)
        self.info_class = gui.widgetLabel(info_box, wordWrap=True)
        self.info_meta = gui.widgetLabel(info_box, wordWrap=True)
        self.set_info()

        # TODO: Implement filtering on the model
        # filter_box = gui.vBox(self.controlArea, 'Filter')
        # self.filter_text = gui.lineEdit(
        #     filter_box, self, value='filter_string',
        #     placeholderText='Filter variables by name',
        #     callback=self._filter_table_variables, callbackOnType=True,
        # )
        # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus)
        # shortcut.setWhatsThis('Filter variables by name')

        self.color_var_model = DomainModel(
            valid_types=(ContinuousVariable, DiscreteVariable),
            placeholder='None',
        )
        box = gui.vBox(self.controlArea, 'Histogram')
        self.cb_color_var = gui.comboBox(
            box, master=self, value='color_var',
            model=self.color_var_model, label='Color:', orientation=Qt.Horizontal,
        )
        self.cb_color_var.currentIndexChanged.connect(self.__color_var_changed)

        gui.rubber(self.controlArea)
        gui.auto_commit(
            self.buttonsArea, self, 'auto_commit', 'Send Selected Rows',
            'Send Automatically'
        )

        # Main area
        self.view = QTableView(
            showGrid=False,
            cornerButtonEnabled=False,
            sortingEnabled=True,
            selectionBehavior=QTableView.SelectRows,
            selectionMode=QTableView.MultiSelection,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            verticalScrollMode=QTableView.ScrollPerPixel,
        )

        hheader = self.view.horizontalHeader()
        hheader.setStretchLastSection(False)
        # Contents precision specifies how many rows should be taken into
        # account when computing the sizes, 0 being the visible rows. This is
        # crucial, since otherwise the `ResizeToContents` section resize mode
        # would call `sizeHint` on every single row in the data before first
        # render. However this, this cannot be used here, since this only
        # appears to work properly when the widget is actually shown. When the
        # widget is not shown, size `sizeHint` is called on every row.
        hheader.setResizeContentsPrecision(5)
        # Set a nice default size so that headers have some space around titles
        hheader.setDefaultSectionSize(120)
        # Set individual column behaviour in `set_data` since the logical
        # indices must be valid in the model, which requires data.
        hheader.setSectionResizeMode(QHeaderView.Interactive)

        vheader = self.view.verticalHeader()
        vheader.setVisible(False)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        def bind_histogram_aspect_ratio(logical_index, _, new_size):
            """Force the horizontal and vertical header to maintain the defined
            aspect ratio specified for the histogram."""
            # Prevent function being exectued more than once per resize
            if logical_index is not self.model.Columns.DISTRIBUTION.index:
                return
            ratio_width, ratio_height = self.HISTOGRAM_ASPECT_RATIO
            unit_width = new_size / ratio_width
            new_height = unit_width * ratio_height
            effective_height = max(new_height, self.MINIMUM_HISTOGRAM_HEIGHT)
            effective_height = min(effective_height, self.MAXIMUM_HISTOGRAM_HEIGHT)
            vheader.setDefaultSectionSize(effective_height)

        def keep_row_centered(logical_index, old_size, new_size):
            """When resizing the widget when scrolled further down, the
            positions of rows changes. Obviously, the user resized in order to
            better see the row of interest. This keeps that row centered."""
            # TODO: This does not work properly
            # Prevent function being exectued more than once per resize
            if logical_index is not self.model.Columns.DISTRIBUTION.index:
                return
            top_row = self.view.indexAt(self.view.rect().topLeft()).row()
            bottom_row = self.view.indexAt(self.view.rect().bottomLeft()).row()
            middle_row = top_row + (bottom_row - top_row) // 2
            self.view.scrollTo(self.model.index(middle_row, 0), QTableView.PositionAtCenter)

        hheader.sectionResized.connect(bind_histogram_aspect_ratio)
        hheader.sectionResized.connect(keep_row_centered)

        self.distribution_delegate = DistributionDelegate()
        self.view.setItemDelegate(self.distribution_delegate)

        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(900, 500)

    def _filter_table_variables(self):
        regex = QRegExp(self.filter_string)
        # If the user explicitly types different cases, we assume they know
        # what they are searching for and account for letter case in filter
        different_case = (
            any(c.islower() for c in self.filter_string) and
            any(c.isupper() for c in self.filter_string)
        )
        if not different_case:
            regex.setCaseSensitivity(Qt.CaseInsensitive)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.data = data

        if data is not None:
            self.model = FeatureStatisticsTableModel(data, parent=self)
            self.color_var_model.set_domain(data.domain)
            # Set the selected index to 1 if any target classes, otherwise 0
            if data.domain.class_vars:
                self.color_var = data.domain.class_vars[0]
            self.openContext(self.data)
        else:
            self.model = None
            self.color_var_model.set_domain(None)

        self.view.setModel(self.model)
        self._filter_table_variables()

        self.distribution_delegate.clear()
        self.set_info()

        # The resize modes for individual columns must be set here, because
        # the logical index must be valid in `setSectionResizeMode`. It is not
        # valid when there is no data in the model.
        if self.model:
            columns, hheader = self.model.Columns, self.view.horizontalHeader()
            hheader.setSectionResizeMode(columns.ICON.index, QHeaderView.ResizeToContents)
            hheader.setSectionResizeMode(columns.DISTRIBUTION.index, QHeaderView.Stretch)

    @pyqtSlot(int)
    def __color_var_changed(self, new_index):
        attribute = None if new_index < 1 else self.cb_color_var.model()[new_index]
        self.distribution_delegate.set_color_attribute(attribute)

        if self.model:
            for row_idx in range(self.model.rowCount()):
                index = self.model.index(
                    row_idx,
                    self.model.Columns.DISTRIBUTION.index)
                self.view.update(index)

    @staticmethod
    def _format_variables_string(variables):
        agg = []
        for var_type_name, var_type in [
            ('categorical', DiscreteVariable),
            ('numeric', ContinuousVariable),
            ('time', TimeVariable),
            ('string', StringVariable)
        ]:
            var_type_list = [v for v in variables if isinstance(v, var_type)]
            if var_type_list:
                agg.append((
                    '%d %s' % (len(var_type_list), var_type_name),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))

    def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')

    def commit(self):
        pass

    def send_report(self):
        pass