Esempio n. 1
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102

    buttons_area_orientation = Qt.Vertical

    inputs = [("Data", Table, "setData"),
              ("Scorer", score.Scorer, "set_learner", widget.Multiple)]
    outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)]

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"})
    reg_default_selected = Setting(
        {"Univariate Linear Regression", "RReliefF"})
    selectMethod = Setting(SelectNBest)
    nSelected = Setting(5)
    auto_apply = Setting(True)

    # Header state for discrete/continuous/no_class scores
    headerState = Setting([None, None, None])

    settings_version = 1
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])

    gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True
    _score_vars = [
        "gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr",
        "rrelief"
    ]

    class Warning(OWWidget.Warning):
        no_target_var = Msg("Data does not have a target variable")

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("{}")

    def __init__(self):
        super().__init__()
        self.measure_scores = None
        self.update_scores = True
        self.usefulAttributes = []
        self.learners = {}
        self.labels = []
        self.out_domain_desc = None

        self.all_measures = SCORES

        self.selectedMeasures = dict([(m.name, True)
                                      for m in self.all_measures])
        # Discrete (0) or continuous (1) class mode
        self.rankMode = 0

        self.data = None

        self.discMeasures = [
            m for m in self.all_measures
            if issubclass(DiscreteVariable, m.score.class_type)
        ]
        self.contMeasures = [
            m for m in self.all_measures
            if issubclass(ContinuousVariable, m.score.class_type)
        ]

        self.score_checks = []
        self.cls_scoring_box = gui.vBox(None, "Scoring for Classification")
        self.reg_scoring_box = gui.vBox(None, "Scoring for Regression")
        boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2
        for _score, var, box in zip(SCORES, self._score_vars, boxes):
            check = gui.checkBox(
                box,
                self,
                var,
                label=_score.name,
                callback=lambda val=_score: self.measuresSelectionChanged(val))
            self.score_checks.append(check)

        self.score_stack = QStackedWidget(self)
        self.score_stack.addWidget(self.cls_scoring_box)
        self.score_stack.addWidget(self.reg_scoring_box)
        self.score_stack.addWidget(QWidget())
        self.controlArea.layout().addWidget(self.score_stack)

        gui.rubber(self.controlArea)

        selMethBox = gui.vBox(self.controlArea,
                              "Select Attributes",
                              addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox,
                     self,
                     "nSelected",
                     1,
                     100,
                     callback=self.nSelectedChanged)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        # Discrete, continuous and no_class table views are stacked
        self.ranksViewStack = QStackedLayout()
        self.mainArea.layout().addLayout(self.ranksViewStack)

        self.discRanksView = QTableView()
        self.ranksViewStack.addWidget(self.discRanksView)
        self.discRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.discRanksView.setSelectionMode(QTableView.MultiSelection)
        self.discRanksView.setSortingEnabled(True)

        self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures]
        self.discRanksModel = QStandardItemModel(self)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)

        self.discRanksProxyModel = MySortProxyModel(self)
        self.discRanksProxyModel.setSourceModel(self.discRanksModel)
        self.discRanksView.setModel(self.discRanksProxyModel)

        self.discRanksView.setColumnWidth(0, 20)
        self.discRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.discRanksView.pressed.connect(self.onSelectItem)
        self.discRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.discRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[0] is not None:
            self.discRanksView.horizontalHeader().restoreState(
                self.headerState[0])

        self.contRanksView = QTableView()
        self.ranksViewStack.addWidget(self.contRanksView)
        self.contRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.contRanksView.setSelectionMode(QTableView.MultiSelection)
        self.contRanksView.setSortingEnabled(True)

        self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures]
        self.contRanksModel = QStandardItemModel(self)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)

        self.contRanksProxyModel = MySortProxyModel(self)
        self.contRanksProxyModel.setSourceModel(self.contRanksModel)
        self.contRanksView.setModel(self.contRanksProxyModel)

        self.contRanksView.setColumnWidth(0, 20)
        self.contRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.contRanksView.pressed.connect(self.onSelectItem)
        self.contRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.contRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[1] is not None:
            self.contRanksView.horizontalHeader().restoreState(
                self.headerState[1])

        self.noClassRanksView = QTableView()
        self.ranksViewStack.addWidget(self.noClassRanksView)
        self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.noClassRanksView.setSelectionMode(QTableView.MultiSelection)
        self.noClassRanksView.setSortingEnabled(True)

        self.noClassRanksLabels = ["#"]
        self.noClassRanksModel = QStandardItemModel(self)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)

        self.noClassRanksProxyModel = MySortProxyModel(self)
        self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel)
        self.noClassRanksView.setModel(self.noClassRanksProxyModel)

        self.noClassRanksView.setColumnWidth(0, 20)
        self.noClassRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.noClassRanksView.pressed.connect(self.onSelectItem)
        self.noClassRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.noClassRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[2] is not None:
            self.noClassRanksView.horizontalHeader().restoreState(
                self.headerState[2])

        # Switch the current view to Discrete
        self.switchRanksMode(0)
        self.resetInternals()
        self.updateDelegates()
        self.updateVisibleScoreColumns()

        self.resize(690, 500)

        self.measure_scores = table((len(self.measures), 0), None)

    def switchRanksMode(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.rankMode = index
        self.ranksViewStack.setCurrentIndex(index)

        if index == 0:
            self.ranksView = self.discRanksView
            self.ranksModel = self.discRanksModel
            self.ranksProxyModel = self.discRanksProxyModel
            self.measures = self.discMeasures
            self.selected_checks = self.cls_default_selected
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        elif index == 1:
            self.ranksView = self.contRanksView
            self.ranksModel = self.contRanksModel
            self.ranksProxyModel = self.contRanksProxyModel
            self.measures = self.contMeasures
            self.selected_checks = self.reg_default_selected
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        else:
            self.ranksView = self.noClassRanksView
            self.ranksModel = self.noClassRanksModel
            self.ranksProxyModel = self.noClassRanksProxyModel
            self.measures = []
            self.selected_checks = set()
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)

        shape = (len(self.measures) + len(self.learners), 0)
        self.measure_scores = table(shape, None)
        self.update_scores = False
        for check, score in zip(self.score_checks, SCORES):
            check.setChecked(score.name in self.selected_checks)
        self.update_scores = True
        self.score_stack.setCurrentIndex(index)
        self.updateVisibleScoreColumns()

    @check_sql_input
    def setData(self, data):
        self.closeContext()
        self.clear_messages()
        self.resetInternals()

        self.data = data
        self.switchRanksMode(0)
        if self.data is not None:
            domain = self.data.domain
            attrs = domain.attributes
            self.usefulAttributes = [
                attr for attr in attrs
                if attr.is_discrete or attr.is_continuous
            ]

            if domain.has_continuous_class:
                self.switchRanksMode(1)
            elif not domain.class_var:
                self.Warning.no_target_var()
                self.switchRanksMode(2)
            elif not domain.has_discrete_class:
                self.Error.invalid_type(type(domain.class_var).__name__)

            if issparse(
                    self.data.X):  # keep only measures supporting sparse data
                self.measures = [
                    m for m in self.measures if m.score.supports_sparse_data
                ]

            self.ranksModel.setRowCount(len(attrs))
            for i, a in enumerate(attrs):
                if a.is_discrete:
                    v = len(a.values)
                else:
                    v = "C"
                item = ScoreValueItem()
                item.setData(v, Qt.DisplayRole)
                self.ranksModel.setItem(i, 0, item)
                item = QStandardItem(a.name)
                item.setData(gui.attributeIconDict[a], Qt.DecorationRole)
                self.ranksModel.setVerticalHeaderItem(i, item)

            shape = (len(self.measures) + len(self.learners), len(attrs))
            self.measure_scores = table(shape, None)
            self.updateScores()
        else:
            self.send("Scores", None)

        self.selected_rows = []
        self.openContext(data)
        self.selectMethodChanged()
        self.commit()

    def get_selection(self):
        selection = self.ranksView.selectionModel().selection()
        return list(set(ind.row() for ind in selection.indexes()))

    def set_learner(self, learner, lid=None):
        if learner is None and lid is not None:
            del self.learners[lid]
        elif learner is not None:
            self.learners[lid] = ScoreMeta(learner.name, learner.name, learner)
        attrs_len = 0 if not self.data else len(self.data.domain.attributes)
        shape = (len(self.learners), attrs_len)
        self.measure_scores = self.measure_scores[:len(self.measures)]
        self.measure_scores += table(shape, None)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)
        measures_mask = [False] * len(self.measures)
        measures_mask += [True for _ in self.learners]
        self.updateScores(measures_mask)
        self.commit()

    def updateScores(self, measuresMask=None):
        """
        Update the current computed scores.

        If `measuresMask` is given it must be an list of bool values
        indicating what measures should be recomputed.

        """
        if not self.data:
            return
        if self.data.has_missing():
            self.information("Missing values have been imputed.")

        measures = self.measures + [v for k, v in self.learners.items()]
        if measuresMask is None:
            # Update all selected measures
            measuresMask = [
                self.selectedMeasures.get(m.name) for m in self.measures
            ]
            measuresMask = measuresMask + [
                v.name for k, v in self.learners.items()
            ]

        data = self.data
        learner_col = len(self.measures)
        if len(measuresMask) <= len(self.measures) or \
                measuresMask[len(self.measures)]:
            self.labels = []
            self.Error.inadequate_learner.clear()

        self.setStatusMessage("Running")
        with self.progressBar():
            n_measure_update = len([x for x in measuresMask if x is not False])
            count = 0
            for index, (meas, mask) in enumerate(zip(measures, measuresMask)):
                if not mask:
                    continue
                self.progressBarSet(90 * count / n_measure_update)
                count += 1
                if index < len(self.measures):
                    estimator = meas.score()
                    try:
                        self.measure_scores[index] = estimator(data)
                    except ValueError:
                        self.measure_scores[index] = []
                        for attr in data.domain.attributes:
                            try:
                                self.measure_scores[index].append(
                                    estimator(data, attr))
                            except ValueError:
                                self.measure_scores[index].append(None)
                else:
                    learner = meas.score
                    if isinstance(learner, Learner) and \
                            not learner.check_learner_adequacy(self.data.domain):
                        self.Error.inadequate_learner(
                            learner.learner_adequacy_err_msg)
                        scores = table((1, len(data.domain.attributes)))
                    else:
                        scores = meas.score.score_data(data)
                    for i, row in enumerate(scores):
                        self.labels.append(meas.shortname + str(i + 1))
                        if len(self.measure_scores) > learner_col:
                            self.measure_scores[learner_col] = row
                        else:
                            self.measure_scores.append(row)
                        learner_col += 1
            self.progressBarSet(90)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels +
                                                      self.labels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels +
                                                      self.labels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels + self.labels)
        self.updateRankModel(measuresMask)
        self.ranksProxyModel.invalidate()
        self.selectMethodChanged()
        self.send("Scores", self.create_scores_table(self.labels))
        self.setStatusMessage("")

    def updateRankModel(self, measuresMask):
        """
        Update the rankModel.
        """
        values = []
        diff = len(self.measure_scores) - len(measuresMask)
        if len(measuresMask):
            measuresMask += [measuresMask[-1]] * diff
        for i in range(self.ranksModel.columnCount() - 1,
                       len(self.measure_scores), -1):
            self.ranksModel.removeColumn(i)

        for i, (scores, m) in enumerate(zip(self.measure_scores,
                                            measuresMask)):
            if not m and self.ranksModel.item(0, i + 1):
                values.append([])
                continue
            values_one = []
            for j, _score in enumerate(scores):
                values_one.append(_score)
                item = self.ranksModel.item(j, i + 1)
                if not item:
                    item = ScoreValueItem()
                    self.ranksModel.setItem(j, i + 1, item)
                item.setData(_score, Qt.DisplayRole)
            values.append(values_one)
        for i, (vals, m) in enumerate(zip(values, measuresMask)):
            if not m:
                continue
            valid_vals = [v for v in vals if v is not None]
            if valid_vals:
                vmin, vmax = min(valid_vals), max(valid_vals)
                for j, v in enumerate(vals):
                    if v is not None:
                        # Set the bar ratio role for i-th measure.
                        ratio = float((v - vmin) / ((vmax - vmin) or 1))
                        item = self.ranksModel.item(j, i + 1)
                        item.setData(ratio, gui.BarRatioRole)

        self.ranksView.setColumnWidth(0, 20)
        self.ranksView.resizeRowsToContents()

    def resetInternals(self):
        self.data = None
        self.usefulAttributes = []
        self.ranksModel.setRowCount(0)

    def onSelectItem(self, index):
        """
        Called when the user selects/unselects an item in the table view.
        """
        self.selectMethod = OWRank.SelectManual  # Manual
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.commit()

    def setSelectMethod(self, method):
        if self.selectMethod != method:
            self.selectMethod = method
            self.selectButtons.button(method).setChecked(True)
            self.selectMethodChanged()

    def selectMethodChanged(self):
        self.autoSelection()
        self.ranksView.setFocus()

    def nSelectedChanged(self):
        self.selectMethod = OWRank.SelectNBest
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.selectMethodChanged()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        rowCount = self.ranksModel.rowCount()
        columnCount = self.ranksModel.columnCount()
        model = self.ranksProxyModel

        if self.selectMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0), model.index(rowCount - 1, columnCount - 1))
        elif self.selectMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0), model.index(nSelected - 1, columnCount - 1))
        else:
            selection = QItemSelection()
            if len(self.selected_rows):
                selection = QItemSelection()
                for row in self.selected_rows:
                    selection.append(
                        QItemSelectionRange(model.index(row, 0),
                                            model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        disc = bytes(self.discRanksView.horizontalHeader().saveState())
        cont = bytes(self.contRanksView.horizontalHeader().saveState())
        no_class = bytes(self.noClassRanksView.horizontalHeader().saveState())
        self.headerState = [disc, cont, no_class]

    def measuresSelectionChanged(self, measure):
        """Measure selection has changed. Update column visibility.
        """
        checked = self.selectedMeasures[measure.name]
        self.selectedMeasures[measure.name] = not checked
        if not checked:
            self.selected_checks.add(measure.name)
        elif measure.name in self.selected_checks:
            self.selected_checks.remove(measure.name)
        measures_mask = [False] * len(self.measures)
        measures_mask += [False for _ in self.learners]
        # Update scores for shown column if they are not yet computed.
        if measure in self.measures and self.measure_scores:
            index = self.measures.index(measure)
            if all(s is None for s in self.measure_scores[index]):
                measures_mask[index] = True
        if self.update_scores:
            self.updateScores(measures_mask)
        self.updateVisibleScoreColumns()

    def updateVisibleScoreColumns(self):
        """
        Update the visible columns of the scores view.
        """
        for i, measure in enumerate(self.measures):
            shown = self.selectedMeasures.get(measure.name)
            self.ranksView.setColumnHidden(i + 1, not shown)
            self.ranksView.setColumnWidth(i + 1, 100)

        index = self.ranksView.horizontalHeader().sortIndicatorSection()
        if self.ranksView.isColumnHidden(index):
            self.headerState[self.rankMode] = None

        if self.headerState[self.rankMode] is None:

            def get_sort_by_col(measures, selected_measures):
                cols = [
                    i + 1 for i, m in enumerate(measures)
                    if m.name in selected_measures
                ]
                return cols[0] if cols else len(measures) + 1

            col = get_sort_by_col(self.measures, self.selected_checks)
            self.ranksView.sortByColumn(col, Qt.DescendingOrder)
            self.autoSelection()

    def updateDelegates(self):
        self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        self.selected_rows = self.get_selection()
        if self.data and len(self.data.domain.attributes) == len(
                self.selected_rows):
            self.selectMethod = OWRank.SelectAll
            self.selectButtons.button(self.selectMethod).setChecked(True)
        selected = self.selectedAttrs()
        if not self.data or not selected:
            self.send("Reduced Data", None)
            self.out_domain_desc = None
        else:
            data = Table(
                Domain(selected, self.data.domain.class_var,
                       self.data.domain.metas), self.data)
            self.send("Reduced Data", data)
            self.out_domain_desc = report.describe_domain(data.domain)

    def selectedAttrs(self):
        if self.data:
            inds = self.ranksView.selectionModel().selectedRows(0)
            source = self.ranksProxyModel.mapToSource
            inds = map(source, inds)
            inds = [ind.row() for ind in inds]
            return [self.data.domain.attributes[i] for i in inds]
        else:
            return []

    def create_scores_table(self, labels):
        indices = [
            i for i, m in enumerate(self.measures)
            if self.selectedMeasures.get(m.name, False)
        ]
        measures = [
            s.name for s in self.measures
            if self.selectedMeasures.get(s.name, False)
        ]
        measures += [label for label in labels]
        if not measures:
            return None
        features = [ContinuousVariable(s) for s in measures]
        metas = [StringVariable("Feature name")]
        domain = Domain(features, metas=metas)

        scores = np.nan_to_num(
            np.array([
                row for i, row in enumerate(self.measure_scores)
                if i in indices or i >= len(self.measures)
            ],
                     dtype=np.float64).T)
        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0
            # headerState had length 2
            headerState = settings.get("headerState", None)
            if headerState is not None and \
                    isinstance(headerState, tuple) and \
                    len(headerState) < 3:
                headerState = (list(headerState) + [None] * 3)[:3]
                settings["headerState"] = headerState
Esempio n. 2
0
class OWDataTable(OWWidget):
    name = "Data Table"
    description = "View the dataset in a spreadsheet."
    icon = "icons/Table.svg"
    priority = 50
    keywords = []

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table, multiple=True)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    show_distributions = Setting(False)
    dist_color_RGB = Setting((220, 220, 220, 255))
    show_attribute_labels = Setting(True)
    select_rows = Setting(True)
    auto_commit = Setting(True)

    color_by_class = Setting(True)
    settingsHandler = DomainContextHandler(
        match_values=DomainContextHandler.MATCH_VALUES_ALL)
    selected_rows = Setting([], schema_only=True)
    selected_cols = Setting([], schema_only=True)

    def __init__(self):
        super().__init__()

        self._inputs = OrderedDict()

        self.__pending_selected_rows = self.selected_rows
        self.selected_rows = None
        self.__pending_selected_cols = self.selected_cols
        self.selected_cols = None

        self.dist_color = QColor(*self.dist_color_RGB)

        info_box = gui.vBox(self.controlArea, "Info")
        self.info_ex = gui.widgetLabel(info_box, 'No data on input.', )
        self.info_ex.setWordWrap(True)
        self.info_attr = gui.widgetLabel(info_box, ' ')
        self.info_attr.setWordWrap(True)
        self.info_class = gui.widgetLabel(info_box, ' ')
        self.info_class.setWordWrap(True)
        self.info_meta = gui.widgetLabel(info_box, ' ')
        self.info_meta.setWordWrap(True)
        info_box.setMinimumWidth(200)
        gui.separator(self.controlArea)

        box = gui.vBox(self.controlArea, "Variables")
        self.c_show_attribute_labels = gui.checkBox(
            box, self, "show_attribute_labels",
            "Show variable labels (if present)",
            callback=self._on_show_variable_labels_changed)

        gui.checkBox(box, self, "show_distributions",
                     'Visualize numeric values',
                     callback=self._on_distribution_color_changed)
        gui.checkBox(box, self, "color_by_class", 'Color by instance classes',
                     callback=self._on_distribution_color_changed)

        box = gui.vBox(self.controlArea, "Selection")

        gui.checkBox(box, self, "select_rows", "Select full rows",
                     callback=self._on_select_rows_changed)

        gui.rubber(self.controlArea)

        reset = gui.button(
            None, self, "Restore Original Order", callback=self.restore_order,
            tooltip="Show rows in the original order", autoDefault=False)
        self.buttonsArea.layout().insertWidget(0, reset)
        gui.auto_send(self.buttonsArea, self, "auto_commit")

        # GUI with tabs
        self.tabs = gui.tabWidget(self.mainArea)
        self.tabs.currentChanged.connect(self._on_current_tab_changed)

    def copy_to_clipboard(self):
        self.copy()

    @staticmethod
    def sizeHint():
        return QSize(800, 500)

    @Inputs.data
    def set_dataset(self, data, tid=None):
        """Set the input dataset."""
        self.closeContext()
        if data is not None:
            datasetname = getattr(data, "name", "Data")
            if tid in self._inputs:
                # update existing input slot
                slot = self._inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
                assert self.tabs.indexOf(view) != -1
                self.tabs.setTabText(self.tabs.indexOf(view), datasetname)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setSectionsMovable(True)
                header.setSectionsClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)
                self.tabs.addTab(view, datasetname)

            view.dataset = data
            self.tabs.setCurrentWidget(view)

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot  # pylint: disable=protected-access
            self._inputs[tid] = slot

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):
                def update(_):
                    QMetaObject.invokeMethod(
                        self, "_update_info", Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self._inputs:
            slot = self._inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                # pylint: disable=protected-access
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)
        self.openContext(data)

        if data and self.__pending_selected_rows is not None:
            self.selected_rows = self.__pending_selected_rows
            self.__pending_selected_rows = None
        else:
            self.selected_rows = []

        if data and self.__pending_selected_cols is not None:
            self.selected_cols = self.__pending_selected_cols
            self.__pending_selected_cols = None
        else:
            self.selected_cols = []

        self.set_selection()
        self.unconditional_commit()

    def _setup_table_view(self, view, data):
        """Setup the `view` (QTableView) with `data` (Orange.data.Table)
        """
        if data is None:
            view.setModel(None)
            return

        datamodel = RichTableModel(data)

        rowcount = data.approx_len()

        if self.color_by_class and data.domain.has_discrete_class:
            color_schema = [
                QColor(*c) for c in data.domain.class_var.colors]
        else:
            color_schema = None
        if self.show_distributions:
            view.setItemDelegate(
                gui.TableBarItem(
                    self, color=self.dist_color, color_schema=color_schema)
            )
        else:
            view.setItemDelegate(QStyledItemDelegate(self))

        # Enable/disable view sorting based on data's type
        view.setSortingEnabled(is_sortable(data))
        header = view.horizontalHeader()
        header.setSectionsClickable(is_sortable(data))
        header.setSortIndicatorShown(is_sortable(data))

        view.setModel(datamodel)

        vheader = view.verticalHeader()
        option = view.viewOptions()
        size = view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option,
            QSize(20, 20), view)

        vheader.setDefaultSectionSize(size.height() + 2)
        vheader.setMinimumSectionSize(5)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        # Limit the number of rows displayed in the QTableView
        # (workaround for QTBUG-18490 / QTBUG-28631)
        maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2)
        if rowcount > maxrows:
            sliceproxy = TableSliceProxy(
                parent=view, rowSlice=slice(0, maxrows))
            sliceproxy.setSourceModel(datamodel)
            # First reset the view (without this the header view retains
            # it's state - at this point invalid/broken)
            view.setModel(None)
            view.setModel(sliceproxy)

        assert view.model().rowCount() <= maxrows
        assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0

        # update the header (attribute names)
        self._update_variable_labels(view)

        selmodel = BlockSelectionModel(
            view.model(), parent=view, selectBlocks=not self.select_rows)
        view.setSelectionModel(selmodel)
        view.selectionModel().selectionChanged.connect(self.update_selection)

    #noinspection PyBroadException
    def set_corner_text(self, table, text):
        """Set table corner text."""
        # As this is an ugly hack, do everything in
        # try - except blocks, as it may stop working in newer Qt.
        # pylint: disable=broad-except
        if not hasattr(table, "btn") and not hasattr(table, "btnfailed"):
            try:
                btn = table.findChild(QAbstractButton)

                class Efc(QObject):
                    @staticmethod
                    def eventFilter(o, e):
                        if (isinstance(o, QAbstractButton) and
                                e.type() == QEvent.Paint):
                            # paint by hand (borrowed from QTableCornerButton)
                            btn = o
                            opt = QStyleOptionHeader()
                            opt.initFrom(btn)
                            state = QStyle.State_None
                            if btn.isEnabled():
                                state |= QStyle.State_Enabled
                            if btn.isActiveWindow():
                                state |= QStyle.State_Active
                            if btn.isDown():
                                state |= QStyle.State_Sunken
                            opt.state = state
                            opt.rect = btn.rect()
                            opt.text = btn.text()
                            opt.position = QStyleOptionHeader.OnlyOneSection
                            painter = QStylePainter(btn)
                            painter.drawControl(QStyle.CE_Header, opt)
                            return True     # eat event
                        return False
                table.efc = Efc()
                # disconnect default handler for clicks and connect a new one, which supports
                # both selection and deselection of all data
                btn.clicked.disconnect()
                btn.installEventFilter(table.efc)
                btn.clicked.connect(self._on_select_all)
                table.btn = btn

                if sys.platform == "darwin":
                    btn.setAttribute(Qt.WA_MacSmallSize)

            except Exception:
                table.btnfailed = True

        if hasattr(table, "btn"):
            try:
                btn = table.btn
                btn.setText(text)
                opt = QStyleOptionHeader()
                opt.text = btn.text()
                s = btn.style().sizeFromContents(
                    QStyle.CT_HeaderSection,
                    opt, QSize(),
                    btn).expandedTo(QApplication.globalStrut())
                if s.isValid():
                    table.verticalHeader().setMinimumWidth(s.width())
            except Exception:
                pass

    def _on_select_all(self, _):
        # pylint: disable=protected-access
        data_info = self.tabs.currentWidget()._input_slot.summary
        if len(self.selected_rows) == data_info.len \
                and len(self.selected_cols) == len(data_info.domain):
            self.tabs.currentWidget().clearSelection()
        else:
            self.tabs.currentWidget().selectAll()

    def _on_current_tab_changed(self, index):
        """Update the info box on current tab change"""
        view = self.tabs.widget(index)
        if view is not None and view.model() is not None:
            # pylint: disable=protected-access
            self.set_info(view._input_slot.summary)
        else:
            self.set_info(None)

    def _update_variable_labels(self, view):
        "Update the variable labels visibility for `view`"
        model = view.model()
        if isinstance(model, TableSliceProxy):
            model = model.sourceModel()

        if self.show_attribute_labels:
            model.setRichHeaderFlags(
                RichTableModel.Labels | RichTableModel.Name)

            labelnames = set()
            domain = model.source.domain
            for a in itertools.chain(domain.metas, domain.variables):
                labelnames.update(a.attributes.keys())
            labelnames = sorted(
                [label for label in labelnames if not label.startswith("_")])
            self.set_corner_text(view, "\n".join([""] + labelnames))
        else:
            model.setRichHeaderFlags(RichTableModel.Name)
            self.set_corner_text(view, "")

    def _on_show_variable_labels_changed(self):
        """The variable labels (var.attribues) visibility was changed."""
        for slot in self._inputs.values():
            self._update_variable_labels(slot.view)

    def _on_distribution_color_changed(self):
        for ti in range(self.tabs.count()):
            widget = self.tabs.widget(ti)
            model = widget.model()
            while isinstance(model, QAbstractProxyModel):
                model = model.sourceModel()
            data = model.source
            class_var = data.domain.class_var
            if self.color_by_class and class_var and class_var.is_discrete:
                color_schema = [QColor(*c) for c in class_var.colors]
            else:
                color_schema = None
            if self.show_distributions:
                delegate = gui.TableBarItem(self, color=self.dist_color,
                                            color_schema=color_schema)
            else:
                delegate = QStyledItemDelegate(self)
            widget.setItemDelegate(delegate)
        tab = self.tabs.currentWidget()
        if tab:
            tab.reset()

    def _on_select_rows_changed(self):
        for slot in self._inputs.values():
            selection_model = slot.view.selectionModel()
            selection_model.setSelectBlocks(not self.select_rows)
            if self.select_rows:
                slot.view.setSelectionBehavior(QTableView.SelectRows)
                # Expand the current selection to full row selection.
                selection_model.select(
                    selection_model.selection(),
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
            else:
                slot.view.setSelectionBehavior(QTableView.SelectItems)

    def restore_order(self):
        """Restore the original data order of the current view."""
        table = self.tabs.currentWidget()
        if table is not None:
            table.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)

    def set_info(self, summary):
        if summary is None:
            self.info_ex.setText("No data on input.")
            self.info_attr.setText("")
            self.info_class.setText("")
            self.info_meta.setText("")
        else:
            info_len, info_attr, info_class, info_meta = \
                format_summary(summary)

            self.info_ex.setText(info_len)
            self.info_attr.setText(info_attr)
            self.info_class.setText(info_class)
            self.info_meta.setText(info_meta)

    @Slot()
    def _update_info(self):
        current = self.tabs.currentWidget()
        if current is not None and current.model() is not None:
            # pylint: disable=protected-access
            self.set_info(current._input_slot.summary)

    def update_selection(self, *_):
        self.commit()

    def set_selection(self):
        if self.selected_rows and self.selected_cols:
            view = self.tabs.currentWidget()
            model = view.model()
            if model.rowCount() <= self.selected_rows[-1] or \
                    model.columnCount() <= self.selected_cols[-1]:
                return

            selection = QItemSelection()
            rowranges = list(ranges(self.selected_rows))
            colranges = list(ranges(self.selected_cols))

            for rowstart, rowend in rowranges:
                for colstart, colend in colranges:
                    selection.append(
                        QItemSelectionRange(
                            view.model().index(rowstart, colstart),
                            view.model().index(rowend - 1, colend - 1)
                        )
                    )
            view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    @staticmethod
    def get_selection(view):
        """
        Return the selected row and column indices of the selection in view.
        """
        selmodel = view.selectionModel()

        selection = selmodel.selection()
        model = view.model()
        # map through the proxies into input table.
        while isinstance(model, QAbstractProxyModel):
            selection = model.mapSelectionToSource(selection)
            model = model.sourceModel()

        assert isinstance(selmodel, BlockSelectionModel)
        assert isinstance(model, TableModel)

        row_spans, col_spans = selection_blocks(selection)
        rows = list(itertools.chain.from_iterable(itertools.starmap(range, row_spans)))
        cols = list(itertools.chain.from_iterable(itertools.starmap(range, col_spans)))
        rows = numpy.array(rows, dtype=numpy.intp)
        # map the rows through the applied sorting (if any)
        rows = model.mapToSourceRows(rows)
        rows.sort()
        rows = rows.tolist()
        return rows, cols

    @staticmethod
    def _get_model(view):
        model = view.model()
        while isinstance(model, QAbstractProxyModel):
            model = model.sourceModel()
        return model

    def commit(self):
        """
        Commit/send the current selected row/column selection.
        """
        selected_data = table = rowsel = None
        view = self.tabs.currentWidget()
        if view and view.model() is not None:
            model = self._get_model(view)
            table = model.source  # The input data table

            # Selections of individual instances are not implemented
            # for SqlTables
            if isinstance(table, SqlTable):
                self.Outputs.selected_data.send(selected_data)
                self.Outputs.annotated_data.send(None)
                return

            rowsel, colsel = self.get_selection(view)
            self.selected_rows, self.selected_cols = rowsel, colsel

            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data

            domain = table.domain

            if len(colsel) < len(domain) + len(domain.metas):
                # only a subset of the columns is selected
                allvars = domain.class_vars + domain.metas + domain.attributes
                columns = [(c, model.headerData(c, Qt.Horizontal,
                                                TableModel.DomainRole))
                           for c in colsel]
                assert all(role is not None for _, role in columns)

                def select_vars(role):
                    """select variables for role (TableModel.DomainRole)"""
                    return [allvars[c] for c, r in columns if r == role]

                attrs = select_vars(TableModel.Attribute)
                if attrs and issparse(table.X):
                    # for sparse data you can only select all attributes
                    attrs = table.domain.attributes
                class_vars = select_vars(TableModel.ClassVar)
                metas = select_vars(TableModel.Meta)
                domain = Orange.data.Domain(attrs, class_vars, metas)

            # Avoid a copy if all/none rows are selected.
            if not rowsel:
                selected_data = None
            elif len(rowsel) == len(table):
                selected_data = select(table, None, domain)
            else:
                selected_data = select(table, rowsel, domain)

        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(create_annotated_table(table, rowsel))

    def copy(self):
        """
        Copy current table selection to the clipboard.
        """
        view = self.tabs.currentWidget()
        if view is not None:
            mime = table_selection_to_mime_data(view)
            QApplication.clipboard().setMimeData(
                mime, QClipboard.Clipboard
            )

    def send_report(self):
        view = self.tabs.currentWidget()
        if not view or not view.model():
            return
        model = self._get_model(view)
        self.report_data_brief(model.source)
        self.report_table(view)
Esempio n. 3
0
class OWDistances(OWWidget, ConcurrentWidgetMixin):
    name = "Distances"
    description = "Compute a matrix of pairwise distances."
    icon = "icons/Distance.svg"
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        distances = Output("Distances", Orange.misc.DistMatrix, dynamic=False)

    settings_version = 3

    axis = Setting(0)  # type: int
    metric_idx = Setting(0)  # type: int

    #: Use normalized distances if the metric supports it.
    #: The default is `True`, expect when restoring from old pre v2 settings
    #: (see `migrate_settings`).
    normalized_dist = Setting(True)  # type: bool
    autocommit = Setting(True)  # type: bool

    want_main_area = False
    buttons_area_orientation = Qt.Vertical

    class Error(OWWidget.Error):
        no_continuous_features = Msg("No numeric features")
        no_binary_features = Msg("No binary features")
        dense_metric_sparse_data = Msg("{} requires dense data.")
        distances_memory_error = Msg("Not enough memory")
        distances_value_error = Msg("Problem in calculation:\n{}")
        data_too_large_for_mahalanobis = Msg(
            "Mahalanobis handles up to 1000 {}.")

    class Warning(OWWidget.Warning):
        ignoring_discrete = Msg("Ignoring categorical features")
        ignoring_nonbinary = Msg("Ignoring non-binary features")
        imputing_data = Msg("Missing values were imputed")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.data = None

        gui.radioButtons(self.controlArea,
                         self,
                         "axis", ["Rows", "Columns"],
                         box="Distances between",
                         callback=self._invalidate)
        box = gui.widgetBox(self.controlArea, "Distance Metric")
        self.metrics_combo = gui.comboBox(box,
                                          self,
                                          "metric_idx",
                                          items=[m[0] for m in METRICS],
                                          callback=self._metric_changed)
        self.normalization_check = gui.checkBox(
            box,
            self,
            "normalized_dist",
            "Normalized",
            callback=self._invalidate,
            tooltip=("All dimensions are (implicitly) scaled to a common"
                     "scale to normalize the influence across the domain."),
            stateWhenDisabled=False)
        _, metric = METRICS[self.metric_idx]
        self.normalization_check.setEnabled(metric.supports_normalization)

        gui.auto_apply(self.controlArea, self, "autocommit")
        self.layout().setSizeConstraint(self.layout().SetFixedSize)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.cancel()
        self.data = data
        self.refresh_metrics()
        self.unconditional_commit()

    def refresh_metrics(self):
        sparse = self.data is not None and issparse(self.data.X)
        for i, metric in enumerate(METRICS):
            item = self.metrics_combo.model().item(i)
            item.setEnabled(not sparse or metric[1].supports_sparse)

    def commit(self):
        # pylint: disable=invalid-sequence-index
        metric = METRICS[self.metric_idx][1]
        self.compute_distances(metric, self.data)

    def compute_distances(self, metric, data):
        def _check_sparse():
            # pylint: disable=invalid-sequence-index
            if issparse(data.X) and not metric.supports_sparse:
                self.Error.dense_metric_sparse_data(
                    METRICS[self.metric_idx][0])
                return False
            return True

        def _fix_discrete():
            nonlocal data
            if data.domain.has_discrete_attributes() \
                    and metric is not distance.Jaccard \
                    and (issparse(data.X) and getattr(metric, "fallback", None)
                         or not metric.supports_discrete
                         or self.axis == 1):
                if not data.domain.has_continuous_attributes():
                    self.Error.no_continuous_features()
                    return False
                self.Warning.ignoring_discrete()
                data = distance.remove_discrete_features(data)
            return True

        def _fix_nonbinary():
            nonlocal data
            if metric is distance.Jaccard and not issparse(data.X):
                nbinary = sum(a.is_discrete and len(a.values) == 2
                              for a in data.domain.attributes)
                if not nbinary:
                    self.Error.no_binary_features()
                    return False
                elif nbinary < len(data.domain.attributes):
                    self.Warning.ignoring_nonbinary()
                    data = distance.remove_nonbinary_features(data)
            return True

        def _fix_missing():
            nonlocal data
            if not metric.supports_missing and bn.anynan(data.X):
                self.Warning.imputing_data()
                data = distance.impute(data)
            return True

        def _check_tractability():
            if metric is distance.Mahalanobis:
                if self.axis == 1:
                    # when computing distances by columns, we want < 100 rows
                    if len(data) > 1000:
                        self.Error.data_too_large_for_mahalanobis("rows")
                        return False
                else:
                    if len(data.domain.attributes) > 1000:
                        self.Error.data_too_large_for_mahalanobis("columns")
                        return False
            return True

        self.clear_messages()
        if data is not None:
            for check in (_check_sparse, _check_tractability, _fix_discrete,
                          _fix_missing, _fix_nonbinary):
                if not check():
                    data = None
                    break

        self.start(DistanceRunner.run, data, metric, self.normalized_dist,
                   self.axis)

    def on_partial_result(self, _):
        pass

    def on_done(self, result: Orange.misc.DistMatrix):
        assert isinstance(result, Orange.misc.DistMatrix) or result is None
        self.Outputs.distances.send(result)

    def on_exception(self, ex):
        if isinstance(ex, ValueError):
            self.Error.distances_value_error(ex)
        elif isinstance(ex, MemoryError):
            self.Error.distances_memory_error()
        elif isinstance(ex, InterruptException):
            pass
        else:
            raise ex

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def _invalidate(self):
        self.commit()

    def _metric_changed(self):
        metric = METRICS[self.metric_idx][1]
        self.normalization_check.setEnabled(metric.supports_normalization)
        self._invalidate()

    def send_report(self):
        # pylint: disable=invalid-sequence-index
        self.report_items((("Distances Between", ["Rows",
                                                  "Columns"][self.axis]),
                           ("Metric", METRICS[self.metric_idx][0])))

    @classmethod
    def migrate_settings(cls, settings, version):
        if version is None or version < 2 and "normalized_dist" not in settings:
            # normalize_dist is set to False when restoring settings from
            # an older version to preserve old semantics.
            settings["normalized_dist"] = False
        if version is None or version < 3:
            # Mahalanobis was moved from idx = 2 to idx = 9
            metric_idx = settings["metric_idx"]
            if metric_idx == 2:
                settings["metric_idx"] = 9
            elif 2 < metric_idx <= 9:
                settings["metric_idx"] -= 1
Esempio n. 4
0
class OWReport(OWWidget):
    name = "Report"
    save_dir = Setting("")
    open_dir = Setting("")

    def __init__(self):
        super().__init__()
        self._setup_ui_()
        self.report_changed = False

        index_file = pkg_resources.resource_filename(__name__, "index.html")
        with open(index_file, "r") as f:
            self.report_html_template = f.read()

    def _setup_ui_(self):
        self.table_model = ReportItemModel(0, len(Column.__members__))
        self.table = ReportTable(self.controlArea)
        self.table.setModel(self.table_model)
        self.table.setShowGrid(False)
        self.table.setSelectionBehavior(QTableView.SelectRows)
        self.table.setSelectionMode(QTableView.SingleSelection)
        self.table.setWordWrap(False)
        self.table.setMouseTracking(True)
        self.table.verticalHeader().setSectionResizeMode(QHeaderView.Fixed)
        self.table.verticalHeader().setDefaultSectionSize(20)
        self.table.verticalHeader().setVisible(False)
        self.table.horizontalHeader().setVisible(False)
        self.table.setFixedWidth(250)
        self.table.setColumnWidth(Column.item, 200)
        self.table.setColumnWidth(Column.remove, 23)
        self.table.setColumnWidth(Column.scheme, 25)
        self.table.clicked.connect(self._table_clicked)
        self.table.selectionModel().selectionChanged.connect(
            self._table_selection_changed)
        self.controlArea.layout().addWidget(self.table)

        self.last_scheme = None
        self.scheme_button = gui.button(self.controlArea,
                                        self,
                                        "Back to Last Scheme",
                                        callback=self._show_last_scheme)
        box = gui.hBox(self.controlArea)
        box.setContentsMargins(-6, 0, -6, 0)
        self.save_button = gui.button(box,
                                      self,
                                      "Save",
                                      callback=self.save_report)
        self.print_button = gui.button(box,
                                       self,
                                       "Print",
                                       callback=self._print_report)

        class PyBridge(QObject):
            @pyqtSlot(str)
            def _select_item(myself, item_id):
                item = self.table_model.get_item_by_id(item_id)
                self.table.selectRow(
                    self.table_model.indexFromItem(item).row())
                self._change_selected_item(item)

            @pyqtSlot(str, str)
            def _add_comment(myself, item_id, value):
                item = self.table_model.get_item_by_id(item_id)
                item.comment = value
                self.report_changed = True

        self.report_view = WebviewWidget(self.mainArea, bridge=PyBridge(self))
        self.mainArea.layout().addWidget(self.report_view)

    @deprecated("Widgets should not be pickled")
    def __getstate__(self):
        rep_dict = self.__dict__.copy()
        for key in ('_OWWidget__env', 'controlArea', 'mainArea', 'report_view',
                    'table', 'table_model'):
            del rep_dict[key]
        items_len = self.table_model.rowCount()
        return rep_dict, [self.table_model.item(i) for i in range(items_len)]

    @deprecated("Widgets should not be pickled")
    def __setstate__(self, state):
        rep_dict, items = state
        self.__dict__.update(rep_dict)
        self._setup_ui_()
        for i in range(len(items)):
            item = items[i]
            self.table_model.add_item(
                ReportItem(item.name, item.html, item.scheme, item.module,
                           item.icon_name, item.comment))

    def _table_clicked(self, index):
        if index.column() == Column.remove:
            self._remove_item(index.row())
            indexes = self.table.selectionModel().selectedIndexes()
            if indexes:
                item = self.table_model.item(indexes[0].row())
                self._scroll_to_item(item)
                self._change_selected_item(item)
        if index.column() == Column.scheme:
            self._show_scheme(index.row())

    def _table_selection_changed(self, new_selection, _):
        if new_selection.indexes():
            item = self.table_model.item(new_selection.indexes()[0].row())
            self._scroll_to_item(item)
            self._change_selected_item(item)

    def _remove_item(self, row):
        self.table_model.removeRow(row)
        self.report_changed = True
        self._build_html()

    def clear(self):
        self.table_model.clear()
        self.report_changed = True
        self._build_html()

    def _add_item(self, widget):
        name = widget.get_widget_name_extension()
        name = "{} - {}".format(widget.name, name) if name else widget.name
        item = ReportItem(name, widget.report_html, self._get_scheme(),
                          widget.__module__, widget.icon)
        self.table_model.add_item(item)
        self.report_changed = True
        return item

    def _build_html(self):
        html = self.report_html_template
        html += "<body>"
        for i in range(self.table_model.rowCount()):
            item = self.table_model.item(i)
            html += "<div id='{}' class='normal' " \
                    "onClick='pybridge._select_item(this.id)'>{}<div " \
                    "class='textwrapper'><textarea " \
                    "placeholder='Write a comment...'" \
                    "onInput='this.innerHTML = this.value;" \
                    "pybridge._add_comment(this.parentNode.parentNode.id, this.value);'" \
                    ">{}</textarea></div>" \
                    "</div>".format(item.id, item.html, item.comment)
        html += "</body></html>"
        self.report_view.setHtml(html)

    def _scroll_to_item(self, item):
        self.report_view.evalJS(
            "document.getElementById('{}').scrollIntoView();".format(item.id))

    def _change_selected_item(self, item):
        self.report_view.evalJS(
            "var sel_el = document.getElementsByClassName('selected')[0]; "
            "if (sel_el.id != {}) "
            "   sel_el.className = 'normal';".format(item.id))
        self.report_view.evalJS(
            "document.getElementById('{}').className = 'selected';".format(
                item.id))
        self.report_changed = True

    def make_report(self, widget):
        item = self._add_item(widget)
        self._build_html()
        self._scroll_to_item(item)
        self.table.selectRow(self.table_model.rowCount() - 1)

    def _get_scheme(self):
        canvas = self.get_canvas_instance()
        return canvas.get_scheme_xml() if canvas else None

    def _show_scheme(self, row):
        scheme = self.table_model.item(row).scheme
        canvas = self.get_canvas_instance()
        if canvas:
            document = canvas.current_document()
            if document.isModifiedStrict():
                self.last_scheme = canvas.get_scheme_xml()
            canvas.load_scheme_xml(scheme)

    def _show_last_scheme(self):
        if self.last_scheme:
            canvas = self.get_canvas_instance()
            if canvas:
                canvas.load_scheme_xml(self.last_scheme)

    def save_report(self):
        """Save report"""
        filename, _ = QFileDialog.getSaveFileName(
            self, "Save Report", self.save_dir,
            "HTML (*.html);;PDF (*.pdf);;Report (*.report)")
        if not filename:
            return QDialog.Rejected

        self.save_dir = os.path.dirname(filename)
        self.saveSettings()
        _, extension = os.path.splitext(filename)
        if extension == ".pdf":
            printer = QPrinter()
            printer.setPageSize(QPrinter.A4)
            printer.setOutputFormat(QPrinter.PdfFormat)
            printer.setOutputFileName(filename)
            self.report_view.print_(printer)
        elif extension == ".report":
            self.save(filename)
        else:

            def save_html(contents):
                try:
                    with open(filename, "w", encoding="utf-8") as f:
                        f.write(contents)
                except PermissionError:
                    self.permission_error(filename)

            save_html(self.report_view.html())
        self.report_changed = False
        return QDialog.Accepted

    def _print_report(self):
        printer = QPrinter()
        print_dialog = QPrintDialog(printer, self)
        print_dialog.setWindowTitle("Print report")
        if print_dialog.exec_() != QDialog.Accepted:
            return
        self.report_view.print_(printer)

    def open_report(self):
        filename, _ = QFileDialog.getOpenFileName(self, "Open Report",
                                                  self.open_dir,
                                                  "Report (*.report)")
        if not filename:
            return

        self.report_changed = False
        self.open_dir = os.path.dirname(filename)
        self.saveSettings()

        try:
            report = self.load(filename)
        except (IOError, AttributeError, pickle.UnpicklingError) as e:
            message_critical(self.tr("Could not load an Orange Report file"),
                             title=self.tr("Error"),
                             informative_text=self.tr(
                                 "Error occurred "
                                 "while loading '{}'.").format(filename),
                             exc_info=True,
                             parent=self)
            log.error(str(e), exc_info=True)
            return
        self.set_instance(report)
        self = report
        self._build_html()
        self.table.selectRow(0)
        self.show()
        self.raise_()

    def save(self, filename):
        attributes = {}
        for key in ('last_scheme', 'open_dir'):
            attributes[key] = getattr(self, key, None)
        items = [
            self.table_model.item(i)
            for i in range(self.table_model.rowCount())
        ]
        report = dict(__version__=1, attributes=attributes, items=items)

        try:
            with open(filename, 'wb') as f:
                pickle.dump(report, f)
        except PermissionError:
            self.permission_error(filename)

    @classmethod
    def load(cls, filename):
        with open(filename, 'rb') as f:
            report = pickle.load(f)

        if not isinstance(report, dict):
            return report

        self = cls()
        self.__dict__.update(report['attributes'])
        for item in report['items']:
            self.table_model.add_item(
                ReportItem(item.name, item.html, item.scheme, item.module,
                           item.icon_name, item.comment))
        return self

    def permission_error(self, filename):
        message_critical(
            self.tr("Permission error when trying to write report."),
            title=self.tr("Error"),
            informative_text=self.tr("Permission error occurred "
                                     "while saving '{}'.").format(filename),
            exc_info=True,
            parent=self)
        log.error("PermissionError when trying to write report.",
                  exc_info=True)

    def is_empty(self):
        return not self.table_model.rowCount()

    def is_changed(self):
        return self.report_changed

    @staticmethod
    def set_instance(report):
        app_inst = QApplication.instance()
        app_inst._report_window = report

    @staticmethod
    def get_instance():
        app_inst = QApplication.instance()
        if not hasattr(app_inst, "_report_window"):
            report = OWReport()
            app_inst._report_window = report
        return app_inst._report_window

    @staticmethod
    def get_canvas_instance():
        for widget in QApplication.topLevelWidgets():
            if isinstance(widget, CanvasMainWindow):
                return widget
Esempio n. 5
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001
    keywords = []

    class Inputs:
        evaluation_results = Input("Evaluation Results",
                                   Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settings_version = 1
    settingsHandler = ClassValuesContextHandler()

    selected_learner = Setting([0], schema_only=True)
    selection = ContextSetting(set())
    selected_quantity = Setting(0)
    append_predictions = Setting(True)
    append_probabilities = Setting(False)
    autocommit = Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg(
            "Evaluation Results input contains invalid values")
        empty_input = widget.Msg("Empty result on input. Nothing to display.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(self.controlArea,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        box='Learners',
                                        callback=self._learner_changed)

        self.outputbox = gui.vBox(self.buttonsArea)
        box = gui.hBox(self.outputbox)
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_apply(self.outputbox, self, "autocommit", box=False)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     label="Show: ",
                     orientation=Qt.Horizontal,
                     callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

    @staticmethod
    def sizeHint():
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""
        # false positive, pylint: disable=no-member
        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        self.Error.no_regression.clear()
        self.Error.empty_input.clear()
        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        elif results is not None and not results.actual.size:
            self.Error.empty_input()
            data = results = None

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        self.Error.invalid_values(shown=nan_values)

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
            return

        self.report_button.setDisabled(False)

        nmodels = results.predicted.shape[0]
        self.headers = class_values + \
                       (unicodedata.lookup("N-ARY SUMMATION"), )

        # NOTE: The 'learner_names' is set in 'Test Learners' widget.
        self.learners = getattr(results, "learner_names",
                                [f"Learner #{i + 1}" for i in range(nmodels)])

        self._init_table(len(class_values))
        self.openContext(data.domain.class_var)
        if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners):
            if self.learners:
                self.selected_learner[:] = [0]
        else:
            self.selected_learner[:] = prev_sel_learner
        self._update()
        self._set_selection()
        self.commit.now()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas
        attrs = self.data.domain.attributes
        names = [var.name for var in chain(metas, [class_var], attrs)]

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            proposed = "{}({})".format(class_var.name, learner_name)
            name = get_unique_names(names, proposed)
            var = Orange.data.DiscreteVariable(name, class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [
                Orange.data.ContinuousVariable("p({})".format(value))
                for value in class_var.values
            ]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if extra:
            with data.unlocked(data.metas):
                data.metas[:, len(self.data.domain.metas):] = \
                    np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    @gui.deferred
    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit.deferred()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit.deferred()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            maxval = normalized[diag].max()
            if maxval > 0:
                colors[diag] = normalized[diag] / maxval

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    # bkcolor is light-ish so use a black text
                    item.setData(QBrush(Qt.black), Qt.ForegroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner[0]],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Esempio n. 6
0
class OWLoadModel(widget.OWWidget):
    name = "Load Model"
    description = "Load a model from an input file."
    priority = 3050
    replaces = ["Orange.widgets.classify.owloadclassifier.OWLoadClassifier"]
    icon = "icons/LoadModel.svg"

    outputs = [("Model", Model, widget.Dynamic)]

    #: List of recent filenames.
    history = Setting([])
    #: Current (last selected) filename or None.
    filename = Setting(None)

    class Error(widget.OWWidget.Error):
        load_error = Msg("An error occured while reading '{}'")

    FILTER = owsavemodel.OWSaveModel.FILTER

    want_main_area = False
    resizing_enabled = False

    def __init__(self):
        super().__init__()
        self.selectedIndex = -1

        box = gui.widgetBox(self.controlArea,
                            self.tr("File"),
                            orientation=QHBoxLayout())

        self.filesCB = gui.comboBox(box,
                                    self,
                                    "selectedIndex",
                                    callback=self._on_recent)
        self.filesCB.setMinimumContentsLength(20)
        self.filesCB.setSizeAdjustPolicy(
            QComboBox.AdjustToMinimumContentsLength)

        self.loadbutton = gui.button(box, self, "...", callback=self.browse)
        self.loadbutton.setIcon(self.style().standardIcon(
            QStyle.SP_DirOpenIcon))
        self.loadbutton.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed)

        self.reloadbutton = gui.button(box,
                                       self,
                                       "Reload",
                                       callback=self.reload,
                                       default=True)
        self.reloadbutton.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        self.reloadbutton.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed)

        # filter valid existing filenames
        self.history = list(filter(os.path.isfile, self.history))[:20]
        for filename in self.history:
            self.filesCB.addItem(os.path.basename(filename), userData=filename)

        # restore the current selection if the filename is
        # in the history list
        if self.filename in self.history:
            self.selectedIndex = self.history.index(self.filename)
        else:
            self.selectedIndex = -1
            self.filename = None
            self.reloadbutton.setEnabled(False)

        if self.filename:
            QTimer.singleShot(0, lambda: self.load(self.filename))

    def browse(self):
        """Select a filename using an open file dialog."""
        if self.filename is None:
            startdir = stdpaths.Documents
        else:
            startdir = os.path.dirname(self.filename)

        filename, _ = QFileDialog.getOpenFileName(self,
                                                  self.tr("Open"),
                                                  directory=startdir,
                                                  filter=self.FILTER)

        if filename:
            self.load(filename)

    def reload(self):
        """Reload the current file."""
        self.load(self.filename)

    def load(self, filename):
        """Load the object from filename and send it to output."""
        try:
            with open(filename, "rb") as f:
                model = pickle.load(f)
        except (pickle.UnpicklingError, OSError, EOFError):
            self.Error.load_error(os.path.split(filename)[-1])
        else:
            self.Error.load_error.clear()
            self._remember(filename)
            self.send("Model", model)

    def _remember(self, filename):
        """
        Remember `filename` was accessed.
        """
        if filename in self.history:
            index = self.history.index(filename)
            del self.history[index]
            self.filesCB.removeItem(index)

        self.history.insert(0, filename)

        self.filesCB.insertItem(0,
                                os.path.basename(filename),
                                userData=filename)
        self.selectedIndex = 0
        self.filename = filename
        self.reloadbutton.setEnabled(self.selectedIndex != -1)

    def _on_recent(self):
        self.load(self.history[self.selectedIndex])
Esempio n. 7
0
class OWund_flux(widget.OWWidget):
    name = "und_flux"
    id = "orange.widgets.dataund_flux"
    description = "xoppy application to compute..."
    icon = "icons/xoppy_und_flux.png"
    author = "create_widget.py"
    maintainer_email = "*****@*****.**"
    priority = 10
    category = ""
    keywords = ["xoppy", "und_flux"]
    outputs = [  #{"name": "xoppy_data",
        # "type": np.ndarray,
        # "doc": ""},
        {
            "name": "xoppy_table",
            "type": Table,
            "doc": ""
        },
        {
            "name": "xoppy_specfile",
            "type": str,
            "doc": ""
        }
    ]

    #inputs = [{"name": "Name",
    #           "type": type,
    #           "handler": None,
    #           "doc": ""}]

    want_main_area = False

    ELECTRONENERGY = Setting(6.04)
    ELECTRONENERGYSPREAD = Setting(0.001)
    ELECTRONCURRENT = Setting(0.2)
    ELECTRONBEAMSIZEH = Setting(0.000395)
    ELECTRONBEAMSIZEV = Setting(9.9e-06)
    ELECTRONBEAMDIVERGENCEH = Setting(1.05e-05)
    ELECTRONBEAMDIVERGENCEV = Setting(3.9e-06)
    PERIODID = Setting(0.018)
    NPERIODS = Setting(222)
    KV = Setting(1.68)
    DISTANCE = Setting(30.0)
    GAPH = Setting(0.001)
    GAPV = Setting(0.001)
    PHOTONENERGYMIN = Setting(3000.0)
    PHOTONENERGYMAX = Setting(55000.0)
    PHOTONENERGYPOINTS = Setting(500)
    METHOD = Setting(0)

    def __init__(self):
        super().__init__()

        box0 = gui.widgetBox(self.controlArea, " ", orientation="horizontal")
        #widget buttons: compute, set defaults, help
        gui.button(box0, self, "Compute", callback=self.compute)
        gui.button(box0, self, "Defaults", callback=self.defaults)
        gui.button(box0, self, "Help", callback=self.help1)
        self.process_showers()
        box = gui.widgetBox(self.controlArea, " ", orientation="vertical")

        idx = -1

        #widget index 0
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONENERGY",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 1
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONENERGYSPREAD",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 2
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONCURRENT",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 3
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONBEAMSIZEH",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 4
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONBEAMSIZEV",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 5
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONBEAMDIVERGENCEH",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 6
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "ELECTRONBEAMDIVERGENCEV",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 7
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "PERIODID",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 8
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "NPERIODS",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=int,
                     validator=QIntValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 9
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "KV",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 10
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "DISTANCE",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 11
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "GAPH",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 12
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "GAPV",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 13
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "PHOTONENERGYMIN",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 14
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "PHOTONENERGYMAX",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=float,
                     validator=QDoubleValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 15
        idx += 1
        box1 = gui.widgetBox(box)
        gui.lineEdit(box1,
                     self,
                     "PHOTONENERGYPOINTS",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     valueType=int,
                     validator=QIntValidator())
        self.show_at(self.unitFlags()[idx], box1)

        #widget index 16
        idx += 1
        box1 = gui.widgetBox(box)
        gui.comboBox(box1,
                     self,
                     "METHOD",
                     label=self.unitLabels()[idx],
                     addSpace=True,
                     items=['US', 'URGENT', 'SRW'],
                     valueType=int,
                     orientation="horizontal")
        self.show_at(self.unitFlags()[idx], box1)

        gui.rubber(self.controlArea)

    def unitLabels(self):
        return [
            "Electron Energy [GeV]", "Electron Energy Spread",
            "Electron Current [A]", "Electron Beam Size H [m]",
            "Electron Beam Size V [m]", "Electron Beam Divergence H [rad]",
            "Electron Beam Divergence V [rad]", "Period ID [m]",
            "Number of periods", "Kv [undulator K value vertical field]",
            "Distance to slit [m]", "Slit gap H [m]", "Slit gap V [m]",
            "photon Energy Min [eV]", "photon Energy Max [eV]",
            "photon Energy Points", "calculation code"
        ]

    def unitFlags(self):
        return [
            "True", "True", "True", "True", "True", "True", "True", "True",
            "True", "True", "True", "True", "True", "True", "True", "True",
            "True"
        ]

    #def unitNames(self):
    #     return ["ELECTRONENERGY", "ELECTRONENERGYSPREAD", "ELECTRONCURRENT", "ELECTRONBEAMSIZEH", "ELECTRONBEAMSIZEV", "ELECTRONBEAMDIVERGENCEH", "ELECTRONBEAMDIVERGENCEV", "PERIODID", "NPERIODS", "KV", "DISTANCE", "GAPH", "GAPV", "PHOTONENERGYMIN", "PHOTONENERGYMAX", "PHOTONENERGYPOINTS", "METHOD"]

    def compute(self):
        fileName = xoppy_calc_und_flux(
            ELECTRONENERGY=self.ELECTRONENERGY,
            ELECTRONENERGYSPREAD=self.ELECTRONENERGYSPREAD,
            ELECTRONCURRENT=self.ELECTRONCURRENT,
            ELECTRONBEAMSIZEH=self.ELECTRONBEAMSIZEH,
            ELECTRONBEAMSIZEV=self.ELECTRONBEAMSIZEV,
            ELECTRONBEAMDIVERGENCEH=self.ELECTRONBEAMDIVERGENCEH,
            ELECTRONBEAMDIVERGENCEV=self.ELECTRONBEAMDIVERGENCEV,
            PERIODID=self.PERIODID,
            NPERIODS=self.NPERIODS,
            KV=self.KV,
            DISTANCE=self.DISTANCE,
            GAPH=self.GAPH,
            GAPV=self.GAPV,
            PHOTONENERGYMIN=self.PHOTONENERGYMIN,
            PHOTONENERGYMAX=self.PHOTONENERGYMAX,
            PHOTONENERGYPOINTS=self.PHOTONENERGYPOINTS,
            METHOD=self.METHOD)
        #send specfile
        self.send("xoppy_specfile", fileName)

        print("Loading file:  ", fileName)
        #load spec file with one scan, # is comment
        out = np.loadtxt(fileName)
        print("data shape: ", out.shape)
        #get labels
        txt = open(fileName).readlines()
        tmp = [line.find("#L") for line in txt]
        itmp = np.where(np.array(tmp) != (-1))
        labels = txt[itmp[0]].replace("#L ", "").split("  ")
        print("data labels: ", labels)
        #
        # build and send orange table
        #
        domain = Domain([ContinuousVariable(i) for i in labels])
        table = Table.from_numpy(domain, out)
        self.send("xoppy_table", table)

    def defaults(self):
        self.resetSettings()
        self.compute()
        return

    def help1(self):
        print("help pressed.")
        xoppy_doc('und_flux')
Esempio n. 8
0
class OWScatterPlot(OWWidget):
    """Scatterplot visualization with explorative analysis and intelligent
    data visualization enhancements."""

    name = 'Scatter Plot'
    description = "Interactive scatter plot visualization with " \
                  "intelligent data visualization enhancements."
    icon = "icons/ScatterPlot.svg"
    priority = 140

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data"),
              ("Features", AttributeList, "set_shown_attributes")]

    outputs = [("Selected Data", Table, Default),
               (ANNOTATED_DATA_SIGNAL_NAME, Table),
               ("Features", Table)]

    settingsHandler = DomainContextHandler()

    auto_send_selection = Setting(True)
    auto_sample = Setting(True)
    toolbar_selection = Setting(0)

    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection = Setting(None, schema_only=True)

    graph = SettingProvider(OWScatterPlotGraph)

    jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10]

    graph_name = "graph.plot_widget.plotItem"

    class Information(OWWidget.Information):
        sampled_sql = Msg("Large SQL table; showing a sample.")

    def __init__(self):
        super().__init__()

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWScatterPlotGraph(self, box, "ScatterPlot")
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        axispen = QPen(self.palette().color(QPalette.Text))
        axis = plot.getAxis("bottom")
        axis.setPen(axispen)

        axis = plot.getAxis("left")
        axis.setPen(axispen)

        self.data = None  # Orange.data.Table
        self.subset_data = None  # Orange.data.Table
        self.data_metas_X = None  # self.data, where primitive metas are moved to X
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)

        common_options = dict(
            labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True,
            valueType=str)
        box = gui.vBox(self.controlArea, "Axis Data")
        dmod = DomainModel
        self.xy_model = DomainModel(dmod.MIXED, valid_types=dmod.PRIMITIVE)
        self.cb_attr_x = gui.comboBox(
            box, self, "attr_x", label="Axis x:", callback=self.update_attr,
            model=self.xy_model, **common_options)
        self.cb_attr_y = gui.comboBox(
            box, self, "attr_y", label="Axis y:", callback=self.update_attr,
            model=self.xy_model, **common_options)

        vizrank_box = gui.hBox(box)
        gui.separator(vizrank_box, width=common_options["labelWidth"])
        self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank(
            vizrank_box, self, "Find Informative Projections", self.set_attr)

        gui.separator(box)

        gui.valueSlider(
            box, self, value='graph.jitter_size', label='Jittering: ',
            values=self.jitter_sizes, callback=self.reset_graph_data,
            labelFormat=lambda x:
            "None" if x == 0 else ("%.1f %%" if x < 1 else "%d %%") % x)
        gui.checkBox(
            gui.indentedBox(box), self, 'graph.jitter_continuous',
            'Jitter continuous values', callback=self.reset_graph_data)

        self.sampling = gui.auto_commit(
            self.controlArea, self, "auto_sample", "Sample", box="Sampling",
            callback=self.switch_sampling, commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

        g = self.graph.gui
        g.point_properties_box(self.controlArea)
        self.models = [self.xy_model] + g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        g.add_widgets([g.ShowLegend, g.ShowGridLines], box)
        gui.checkBox(
            box, self, value='graph.tooltip_shows_all',
            label='Show all data on mouse hover')
        self.cb_class_density = gui.checkBox(
            box, self, value='graph.class_density', label='Show class density',
            callback=self.update_density)
        self.cb_reg_line = gui.checkBox(
            box, self, value='graph.show_reg_line',
            label='Show regression line', callback=self.update_regression_line)
        gui.checkBox(
            box, self, 'graph.label_only_selected',
            'Label only selected points', callback=self.graph.update_labels)

        self.zoom_select_toolbar = g.zoom_select_toolbar(
            gui.vBox(self.controlArea, "Zoom/Select"), nomargin=True,
            buttons=[g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom,
                     g.StateButtonsEnd, g.ZoomReset]
        )
        buttons = self.zoom_select_toolbar.buttons
        buttons[g.Zoom].clicked.connect(self.graph.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.graph.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(self.graph.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.graph.reset_button_clicked)
        self.controlArea.layout().addStretch(100)
        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_send_selection",
                        "Send Selection", "Send Automatically")

        def zoom(s):
            """Zoom in/out by factor `s`."""
            viewbox = plot.getViewBox()
            # scaleBy scales the view's bounds (the axis range)
            viewbox.scaleBy((1 / s, 1 / s))

        def fit_to_view():
            viewbox = plot.getViewBox()
            viewbox.autoRange()

        zoom_in = QAction(
            "Zoom in", self, triggered=lambda: zoom(1.25)
        )
        zoom_in.setShortcuts([QKeySequence(QKeySequence.ZoomIn),
                              QKeySequence(self.tr("Ctrl+="))])
        zoom_out = QAction(
            "Zoom out", self, shortcut=QKeySequence.ZoomOut,
            triggered=lambda: zoom(1 / 1.25)
        )
        zoom_fit = QAction(
            "Fit in view", self,
            shortcut=QKeySequence(Qt.ControlModifier | Qt.Key_0),
            triggered=fit_to_view
        )
        self.addActions([zoom_in, zoom_out, zoom_fit])

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    # def settingsFromWidgetCallback(self, handler, context):
    #     context.selectionPolygons = []
    #     for curve in self.graph.selectionCurveList:
    #         xs = [curve.x(i) for i in range(curve.dataSize())]
    #         ys = [curve.y(i) for i in range(curve.dataSize())]
    #         context.selectionPolygons.append((xs, ys))

    # def settingsToWidgetCallback(self, handler, context):
    #     selections = getattr(context, "selectionPolygons", [])
    #     for (xs, ys) in selections:
    #         c = SelectionCurve("")
    #         c.setData(xs,ys)
    #         c.attach(self.graph)
    #         self.graph.selectionCurveList.append(c)

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self.update_graph()

    def set_data(self, data):
        self.clear_messages()
        self.Information.sampled_sql.clear()
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(data, SqlTable):
            if data.approx_len() < 4000:
                data = Table(data)
            else:
                self.Information.sampled_sql()
                self.sql_data = data
                data_sample = data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if data is not None and (len(data) == 0 or len(data.domain) == 0):
            data = None
        if self.data and data and self.data.checksum() == data.checksum():
            return

        self.closeContext()
        same_domain = (self.data and data and
                       data.domain.checksum() == self.data.domain.checksum())
        self.data = data
        self.data_metas_X = self.move_primitive_metas_to_X(data)

        if not same_domain:
            self.init_attr_values()
        self.vizrank.initialize()
        self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []
        self.vizrank_button.setEnabled(
            self.data is not None and self.data.domain.class_var is not None
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1)
        if self.data is not None and self.data.domain.class_var is None \
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1:
            self.vizrank_button.setToolTip(
                "Data with a class variable is required.")
        else:
            self.vizrank_button.setToolTip("")
        self.openContext(self.data)

        def findvar(name, iterable):
            """Find a Orange.data.Variable in `iterable` by name"""
            for el in iterable:
                if isinstance(el, Orange.data.Variable) and el.name == name:
                    return el
            return None

        # handle restored settings from  < 3.3.9 when attr_* were stored
        # by name
        if isinstance(self.attr_x, str):
            self.attr_x = findvar(self.attr_x, self.xy_model)
        if isinstance(self.attr_y, str):
            self.attr_y = findvar(self.attr_y, self.xy_model)
        if isinstance(self.graph.attr_label, str):
            self.graph.attr_label = findvar(
                self.graph.attr_label, self.label_model)
        if isinstance(self.graph.attr_color, str):
            self.graph.attr_color = findvar(
                self.graph.attr_color, self.color_model)
        if isinstance(self.graph.attr_shape, str):
            self.graph.attr_shape = findvar(
                self.graph.attr_shape, self.shape_model)
        if isinstance(self.graph.attr_size, str):
            self.graph.attr_size = findvar(
                self.graph.attr_size, self.size_model)

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            return self.__timer.stop()
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.data_metas_X = self.move_primitive_metas_to_X(self.data)
            self.handleNewSignals()

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def move_primitive_metas_to_X(self, data):
        if data is not None:
            new_attrs = [a for a in data.domain.attributes + data.domain.metas
                         if a.is_primitive()]
            new_metas = [m for m in data.domain.metas if not m.is_primitive()]
            data = Table.from_table(Domain(new_attrs, data.domain.class_vars,
                                           new_metas), data)
        return data

    def set_subset_data(self, subset_data):
        self.warning()
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning("Data subset does not support large Sql tables")
                subset_data = None
        self.subset_data = self.move_primitive_metas_to_X(subset_data)
        self.controls.graph.alpha_value.setEnabled(subset_data is None)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        self.graph.new_data(self.data_metas_X, self.subset_data)
        if self.attribute_selection_list and \
                all(attr in self.graph.domain
                        for attr in self.attribute_selection_list):
            self.attr_x = self.attribute_selection_list[0]
            self.attr_y = self.attribute_selection_list[1]
        self.attribute_selection_list = None
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line())
        self.apply_selection()
        self.unconditional_commit()

    def apply_selection(self):
        """Apply selection saved in workflow."""
        if self.data is not None and self.selection is not None:
            self.graph.selection = np.zeros(len(self.data), dtype=np.uint8)
            self.selection = [x for x in self.selection if x < len(self.data)]
            self.graph.selection[self.selection] = 1
            self.graph.update_colors(keep_colors=True)

    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
        else:
            self.attribute_selection_list = None

    def get_shown_attributes(self):
        return self.attr_x, self.attr_y

    def init_attr_values(self):
        domain = self.data and self.data.domain
        for model in self.models:
            model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x
        self.graph.attr_color = domain and self.data.domain.class_var or None
        self.graph.attr_shape = None
        self.graph.attr_size = None
        self.graph.attr_label = None

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def update_attr(self):
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line())
        self.send_features()

    def update_colors(self):
        self.graph.update_colors()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def update_density(self):
        self.update_graph(reset_view=False)

    def update_regression_line(self):
        self.update_graph(reset_view=False)

    def update_graph(self, reset_view=True, **_):
        axis = self.graph.plot_widget.getAxis("left")
        axis.textWidth = 0
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.attr_x, self.attr_y, reset_view)

    def selection_changed(self):
        self.send_data()

    @staticmethod
    def create_groups_table(data, selection):
        if data is None:
            return None
        names = [var.name for var in data.domain.variables + data.domain.metas]
        name = get_next_name(names, "Selection group")
        metas = data.domain.metas + (
            DiscreteVariable(
                name,
                ["Unselected"] + ["G{}".format(i + 1)
                                  for i in range(np.max(selection))]),
        )
        domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
        table = Table(
            domain, data.X, data.Y,
            metas=np.hstack((data.metas, selection.reshape(len(data), 1))))
        table.attributes = data.attributes
        table.ids = data.ids
        return table

    def send_data(self):
        selected = None
        selection = None
        # TODO: Implement selection for sql data
        graph = self.graph
        if isinstance(self.data, SqlTable):
            selected = self.data
        elif self.data is not None:
            selection = graph.get_selection()
            if len(selection) > 0:
                selected = self.data[selection]
        if graph.selection is not None and np.max(graph.selection) > 1:
            annotated = self.create_groups_table(self.data, graph.selection)
        else:
            annotated = create_annotated_table(self.data, selection)
        self.send("Selected Data", selected)
        self.send(ANNOTATED_DATA_SIGNAL_NAME, annotated)

        # Store current selection in a setting that is stored in workflow
        if self.selection is not None and len(selection):
            self.selection = list(selection)

    def send_features(self):
        features = None
        if self.attr_x or self.attr_y:
            dom = Domain([], metas=(StringVariable(name="feature"),))
            features = Table(dom, [[self.attr_x], [self.attr_y]])
            features.name = "Features"
        self.send("Features", features)

    def commit(self):
        self.send_data()
        self.send_features()

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)

    def send_report(self):
        if self.data is None:
            return
        def name(var):
            return var and var.name
        caption = report.render_items_vert((
            ("Color", name(self.graph.attr_color)),
            ("Label", name(self.graph.attr_label)),
            ("Shape", name(self.graph.attr_shape)),
            ("Size", name(self.graph.attr_size)),
            ("Jittering", (self.attr_x.is_discrete or
                           self.attr_y.is_discrete or
                           self.graph.jitter_continuous) and
             self.graph.jitter_size)))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def closeContext(self):
        if self.current_context is not None:
            # When dataset changes, forget selection
            self.selection = None
        super().closeContext()

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()
Esempio n. 9
0
class OWSparkSQLTableContext(SharedSparkContext, widget.OWWidget):
    priority = 1
    name = "Hive Table"
    description = "Create a Spark DataFrame from a Hive Table"
    icon = "../icons/Hive.png"
    outputs = [("DataFrame", pyspark.sql.DataFrame, widget.Dynamic)]

    want_main_area = False
    resizing_enabled = True
    databases = ['default']
    tables = list()
    out_df = None
    database = ''
    table = ''
    saved_gui_params = Setting(OrderedDict())

    def __init__(self):
        super().__init__()

        # The main label of the Control's GUI.
        # gui.label(self.controlArea, self, "Spark Context")

        # Create parameters Box.
        box = gui.widgetBox(self.controlArea, "Spark SQL Table", addSpace=True)

        self.gui_parameters = OrderedDict()

        if self.hc:
            self.databases = [
                i.result for i in self.hc.sql("show databases").collect()
            ]

        default_value = self.saved_gui_params.get('database', 'default')
        if default_value not in self.databases:
            self.databases.append(default_value)
            self.database = default_value
        self.refresh_databases_btn = gui.button(
            box,
            self,
            label='Refresh databases',
            callback=self.fill_database_list)
        self.gui_parameters['database'] = GuiParam(
            parent_widget=box,
            list_values=self.databases,
            label='Database',
            default_value=default_value,
            callback_func=self.refresh_database)

        default_value = self.saved_gui_params.get('table', '')
        self.gui_parameters['table'] = GuiParam(parent_widget=box,
                                                label='Table',
                                                default_value=default_value,
                                                list_values=[default_value])
        self.refresh_database(self.gui_parameters['database'].get_value())

        action_box = gui.widgetBox(box)
        # Action Button
        self.create_sc_btn = gui.button(action_box,
                                        self,
                                        label='Submit',
                                        callback=self.submit)

    def fill_database_list(self):
        if self.hc:
            self.databases = [
                i.result for i in self.hc.sql("show databases").collect()
            ]
            self.gui_parameters['database'].update(values=self.databases)

    def refresh_database(self, text):
        if self.hc is None:
            return
        self.database = text
        if self.databases and self.databases != '':
            self.tables = self.hc.tableNames(self.database)
            self.gui_parameters['table'].update(values=self.tables)

    def dummy_func(self):
        pass

    def submit(self):
        if self.hc is None:
            return
        self.database = self.gui_parameters['database'].get_value()
        self.table = self.gui_parameters['table'].get_value()
        self.out_df = self.hc.table(self.database + '.' + self.table)
        self.send("DataFrame", self.out_df)
        self.update_saved_gui_parameters()
        self.hide()

    def update_saved_gui_parameters(self):
        for k in self.gui_parameters:
            self.saved_gui_params[k] = self.gui_parameters[k].get_value()
Esempio n. 10
0
class OWTreeLearner(OWBaseLearner):
    """Tree algorithm with forward pruning."""
    name = "Tree"
    description = "A tree algorithm with forward pruning."
    icon = "icons/Tree.svg"
    replaces = [
        "Orange.widgets.classify.owclassificationtree.OWClassificationTree",
        "Orange.widgets.regression.owregressiontree.OWRegressionTree",
        "Orange.widgets.classify.owclassificationtree.OWTreeLearner",
        "Orange.widgets.regression.owregressiontree.OWTreeLearner",
    ]
    priority = 30
    keywords = []

    LEARNER = TreeLearner

    binary_trees = Setting(True)
    limit_min_leaf = Setting(True)
    min_leaf = Setting(2)
    limit_min_internal = Setting(True)
    min_internal = Setting(5)
    limit_depth = Setting(True)
    max_depth = Setting(100)

    # Classification only settings
    limit_majority = Setting(True)
    sufficient_majority = Setting(95)

    spin_boxes = (
        ("Min. number of instances in leaves: ",
         "limit_min_leaf", "min_leaf", 1, 1000),
        ("Do not split subsets smaller than: ",
         "limit_min_internal", "min_internal", 1, 1000),
        ("Limit the maximal tree depth to: ",
         "limit_depth", "max_depth", 1, 1000))

    classification_spin_boxes = (
        ("Stop when majority reaches [%]: ",
         "limit_majority", "sufficient_majority", 51, 100),)

    def add_main_layout(self):
        box = gui.widgetBox(self.controlArea, 'Parameters')
        # the checkbox is put into vBox for alignemnt with other checkboxes
        gui.checkBox(gui.vBox(box), self, "binary_trees", "Induce binary tree",
                     callback=self.settings_changed)
        for label, check, setting, fromv, tov in self.spin_boxes:
            gui.spin(box, self, setting, fromv, tov, label=label,
                     checked=check, alignment=Qt.AlignRight,
                     callback=self.settings_changed,
                     checkCallback=self.settings_changed, controlWidth=80)

    def add_classification_layout(self, box):
        for label, check, setting, minv, maxv in self.classification_spin_boxes:
            gui.spin(box, self, setting, minv, maxv,
                     label=label, checked=check, alignment=Qt.AlignRight,
                     callback=self.settings_changed, controlWidth=80,
                     checkCallback=self.settings_changed)

    def learner_kwargs(self):
        # Pylint doesn't get our Settings
        # pylint: disable=invalid-sequence-index
        return dict(
            max_depth=(None, self.max_depth)[self.limit_depth],
            min_samples_split=(2, self.min_internal)[self.limit_min_internal],
            min_samples_leaf=(1, self.min_leaf)[self.limit_min_leaf],
            binarize=self.binary_trees,
            preprocessors=self.preprocessors,
            sufficient_majority=(1, self.sufficient_majority / 100)[
                self.limit_majority])

    def create_learner(self):
        # pylint: disable=not-callable
        return self.LEARNER(**self.learner_kwargs())

    def get_learner_parameters(self):
        from Orange.widgets.report import plural_w
        items = OrderedDict()
        items["Pruning"] = ", ".join(s for s, c in (
            (plural_w("at least {number} instance{s} in leaves",
                      self.min_leaf), self.limit_min_leaf),
            (plural_w("at least {number} instance{s} in internal nodes",
                      self.min_internal), self.limit_min_internal),
            ("maximum depth {}".format(self.max_depth), self.limit_depth)
        ) if c) or "None"
        if self.limit_majority:
            items["Splitting"] = "Stop splitting when majority reaches %d%% " \
                                 "(classification only)" % \
                                 self.sufficient_majority
        items["Binary trees"] = ("No", "Yes")[self.binary_trees]
        return items
Esempio n. 11
0
class OWNYT(OWWidget):
    class APICredentialsDialog(OWWidget):
        name = "New York Times API key"
        want_main_area = False
        resizing_enabled = False
        cm_key = CredentialManager('NY Times API Key')
        key_input = ''

        class Error(OWWidget.Error):
            invalid_credentials = Msg(
                'This credentials are invalid. '
                'Check the key and your internet connection.')

        def __init__(self, parent):
            super().__init__()
            self.parent = parent
            self.api = None

            form = QFormLayout()
            form.setContentsMargins(5, 5, 5, 5)
            self.key_edit = gui.lineEdit(self,
                                         self,
                                         'key_input',
                                         controlWidth=400)
            form.addRow('Key:', self.key_edit)
            self.controlArea.layout().addLayout(form)
            self.submit_button = gui.button(self.controlArea, self, "OK",
                                            self.accept)

            self.load_credentials()

        def load_credentials(self):
            self.key_edit.setText(self.cm_key.key)

        def save_credentials(self):
            self.cm_key.key = self.key_input

        def check_credentials(self):
            api = NYT(self.key_input)
            if api.api_key_valid():
                self.save_credentials()
            else:
                api = None
            self.api = api

        def accept(self, silent=False):
            if not silent: self.Error.invalid_credentials.clear()
            self.check_credentials()
            if self.api:
                self.parent.update_api(self.api)
                super().accept()
            elif not silent:
                self.Error.invalid_credentials()

    name = "NY Times"
    description = "Fetch articles from the New York Times search API."
    icon = "icons/NYTimes.svg"
    priority = 130

    class Outputs:
        corpus = Output("Corpus", Corpus)

    want_main_area = False
    resizing_enabled = False

    recent_queries = Setting([])
    date_from = Setting((datetime.now().date() - timedelta(365)))
    date_to = Setting(datetime.now().date())

    attributes = [
        feat.name for feat, _ in NYT.metas if isinstance(feat, StringVariable)
    ]
    text_includes = Setting([feat.name for feat in NYT.text_features])

    class Warning(OWWidget.Warning):
        no_text_fields = Msg(
            'Text features are inferred when none are selected.')

    class Error(OWWidget.Error):
        no_api = Msg('Please provide a valid API key.')
        no_query = Msg('Please provide a query.')
        offline = Msg('No internet connection.')
        api_error = Msg('API error: {}')
        rate_limit = Msg('Rate limit exceeded. Please try again later.')

    def __init__(self):
        super().__init__()
        self.corpus = None
        self.nyt_api = None
        self.output_info = ''
        self.num_retrieved = 0
        self.num_all = 0

        # API key
        self.api_dlg = self.APICredentialsDialog(self)
        self.api_dlg.accept(silent=True)
        gui.button(self.controlArea,
                   self,
                   'Article API Key',
                   callback=self.api_dlg.exec_,
                   focusPolicy=Qt.NoFocus)

        # Query
        query_box = gui.widgetBox(self.controlArea, 'Query', addSpace=True)
        self.query_box = QueryBox(query_box,
                                  self,
                                  self.recent_queries,
                                  callback=self.new_query_input)

        # Year box
        date_box = gui.hBox(query_box)
        DatePickerInterval(date_box,
                           self,
                           'date_from',
                           'date_to',
                           min_date=MIN_DATE,
                           max_date=date.today(),
                           margin=(0, 3, 0, 0))

        # Text includes features
        self.controlArea.layout().addWidget(
            CheckListLayout('Text includes',
                            self,
                            'text_includes',
                            self.attributes,
                            cols=2,
                            callback=self.set_text_features))

        # Output
        info_box = gui.hBox(self.controlArea, 'Output')
        gui.label(info_box, self, 'Articles: %(output_info)s')

        # Buttons
        self.button_box = gui.hBox(self.controlArea)

        self.search_button = gui.button(self.button_box,
                                        self,
                                        'Search',
                                        self.start_stop,
                                        focusPolicy=Qt.NoFocus)

    def new_query_input(self):
        self.search.stop()
        self.run_search()

    def start_stop(self):
        if self.search.running:
            self.search.stop()
        else:
            self.query_box.synchronize(silent=True)
            self.run_search()

    @gui_require('nyt_api', 'no_api')
    @gui_require('recent_queries', 'no_query')
    def run_search(self):
        self.search()

    @asynchronous
    def search(self):
        return self.nyt_api.search(self.recent_queries[0],
                                   self.date_from,
                                   self.date_to,
                                   on_progress=self.progress_with_info,
                                   should_break=self.search.should_break)

    @search.callback(should_raise=False)
    def progress_with_info(self, n_retrieved, n_all):
        self.progressBarSet(
            100 *
            (n_retrieved / n_all if n_all else 1))  # prevent division by 0
        self.num_all = n_all
        self.num_retrieved = n_retrieved
        self.update_info_label()

    @search.on_start
    def on_start(self):
        self.Error.api_error.clear()
        self.Error.rate_limit.clear()
        self.Error.offline.clear()
        self.num_all, self.num_retrieved = 0, 0
        self.update_info_label()
        self.progressBarInit()
        self.search_button.setText('Stop')
        self.Outputs.corpus.send(None)

    @search.on_result
    def on_result(self, result):
        self.search_button.setText('Search')
        self.corpus = result
        self.set_text_features()
        self.progressBarFinished()

    def update_info_label(self):
        self.output_info = '{}/{}'.format(self.num_retrieved, self.num_all)

    def set_text_features(self):
        self.Warning.no_text_fields.clear()
        if not self.text_includes:
            self.Warning.no_text_fields()

        if self.corpus is not None:
            vars_ = [
                var for var in self.corpus.domain.metas
                if var.name in self.text_includes
            ]
            self.corpus.set_text_features(vars_ or None)
            self.Outputs.corpus.send(self.corpus)

    def update_api(self, api):
        self.nyt_api = api
        self.Error.no_api.clear()
        self.nyt_api.on_error = self.Error.api_error
        self.nyt_api.on_rate_limit = self.Error.rate_limit
        self.nyt_api.on_no_connection = self.Error.offline

    def send_report(self):
        self.report_items([
            ('Query', self.recent_queries[0] if self.recent_queries else ''),
            ('Date from', self.date_from),
            ('Date to', self.date_to),
            ('Text includes', ', '.join(self.text_includes)),
            ('Output', self.output_info or 'Nothing'),
        ])
Esempio n. 12
0
class OWAggregateColumns(widget.OWWidget):
    name = "Aggregate Columns"
    description = "Compute a sum, max, min ... of selected columns."
    category = "Transform"
    icon = "icons/AggregateColumns.svg"
    priority = 1200
    keywords = ["aggregate", "sum", "product", "max", "min", "mean",
                "median", "variance"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False

    settingsHandler = DomainContextHandler()
    variables: List[Variable] = ContextSetting([])
    operation = Setting("Sum")
    var_name = Setting("agg")
    auto_apply = Setting(True)

    Operations = {"Sum": np.nansum, "Product": np.nanprod,
                  "Min": np.nanmin, "Max": np.nanmax,
                  "Mean": np.nanmean, "Variance": np.nanvar,
                  "Median": np.nanmedian}
    TimePreserving = ("Min", "Max", "Mean", "Median")

    def __init__(self):
        super().__init__()
        self.data = None

        box = gui.vBox(self.controlArea, box=True)

        self.variable_model = DomainModel(
            order=DomainModel.MIXED, valid_types=(ContinuousVariable, ))
        var_list = gui.listView(
            box, self, "variables", model=self.variable_model,
            callback=self.commit.deferred
        )
        var_list.setSelectionMode(var_list.ExtendedSelection)

        combo = gui.comboBox(
            box, self, "operation",
            label="Operator: ", orientation=Qt.Horizontal,
            items=list(self.Operations), sendSelectedValue=True,
            callback=self.commit.deferred
        )
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

        gui.lineEdit(
            box, self, "var_name",
            label="Variable name: ", orientation=Qt.Horizontal,
            callback=self.commit.deferred
        )

        gui.auto_apply(self.controlArea, self)

    @Inputs.data
    def set_data(self, data: Table = None):
        self.closeContext()
        self.variables.clear()
        self.data = data
        if self.data:
            self.variable_model.set_domain(data.domain)
            self.openContext(data)
        else:
            self.variable_model.set_domain(None)
        self.commit.now()

    @gui.deferred
    def commit(self):
        augmented = self._compute_data()
        self.Outputs.data.send(augmented)

    def _compute_data(self):
        if not self.data or not self.variables:
            return self.data

        new_col = self._compute_column()
        new_var = self._new_var()
        return self.data.add_column(new_var, new_col)

    def _compute_column(self):
        arr = np.empty((len(self.data), len(self.variables)))
        for i, var in enumerate(self.variables):
            arr[:, i] = self.data.get_column_view(var)[0].astype(float)
        func = self.Operations[self.operation]
        return func(arr, axis=1)

    def _new_var_name(self):
        return get_unique_names(self.data.domain, self.var_name)

    def _new_var(self):
        name = self._new_var_name()
        if self.operation in self.TimePreserving \
                and all(isinstance(var, TimeVariable) for var in self.variables):
            return TimeVariable(name)
        return ContinuousVariable(name)

    def send_report(self):
        # fp for self.variables, pylint: disable=unsubscriptable-object
        if not self.data or not self.variables:
            return
        var_list = ", ".join(f"'{var.name}'"
                             for var in self.variables[:31][:-1])
        if len(self.variables) > 30:
            var_list += f" and {len(self.variables) - 30} others"
        else:
            var_list += f" and '{self.variables[-1].name}'"
        self.report_items((
            ("Output:",
             f"'{self._new_var_name()}' as {self.operation.lower()} of {var_list}"
            ),
        ))
Esempio n. 13
0
class OWKNNLearner(OWProvidesLearner, widget.OWWidget):
    name = "Nearest Neighbors"
    description = "k-nearest neighbors classification algorithm."
    icon = "icons/KNN.svg"
    inputs = [("Data", Table, "set_data")] + OWProvidesLearner.inputs
    outputs = [("Learner", KNNLearner), ("Classifier", SklModel)]

    want_main_area = False
    resizing_enabled = False

    weights = ["uniform", "distance"]
    metrics = ["euclidean", "manhattan", "chebyshev", "mahalanobis"]

    learner_name = Setting("kNN")
    n_neighbors = Setting(5)
    metric_index = Setting(0)
    weight_type = Setting(0)

    def __init__(self):
        super().__init__()
        self.data = None
        self.preprocessors = None

        box = gui.widgetBox(self.controlArea, "Learner/Classifier Name")
        gui.lineEdit(box, self, "learner_name")

        box = gui.widgetBox(self.controlArea, "Neighbors")
        gui.spin(box, self, "n_neighbors", 1, 100, label="Number of neighbors",
                 alignment=Qt.AlignRight)
        gui.comboBox(box, self, "metric_index", label="Metric",
                     orientation="horizontal",
                     items=[i.capitalize() for i in self.metrics])
        gui.comboBox(box, self, "weight_type", label='Weight',
                     orientation="horizontal",
                     items=[i.capitalize() for i in self.weights])

        g = QHBoxLayout()
        self.controlArea.layout().addLayout(g)
        apply = gui.button(None, self, "Apply",
                           callback=self.apply, default=True)
        g.layout().addWidget(self.report_button)
        g.layout().addWidget(apply)
        self.apply()

    @check_sql_input
    def set_data(self, data):
        self.data = data
        if data is not None:
            self.apply()

    LEARNER = KNNLearner

    def apply(self):
        learner = self.LEARNER(
            n_neighbors=self.n_neighbors,
            metric=self.metrics[self.metric_index],
            weights=self.weights[self.weight_type],
            preprocessors=self.preprocessors
        )
        learner.name = self.learner_name
        classifier = None

        if self.data is not None:
            self.error(0)
            if not learner.check_learner_adequacy(self.data.domain):
                self.error(0, learner.learner_adequacy_err_msg)
            else:
                classifier = learner(self.data)
                classifier.name = self.learner_name

        self.send("Learner", learner)
        self.send("Classifier", classifier)

    def send_report(self):
        self.report_items((("Name", self.learner_name),))
        self.report_items("Model parameters", (
            ("Number of neighbours", self.n_neighbors),
            ("Metric", self.metrics[self.metric_index].capitalize()),
            ("Weight", self.weights[self.weight_type].capitalize())))
        if self.data:
            self.report_data("Data", self.data)
class OWSelectAttributes(widget.OWWidget):
    name = "Select Columns"
    description = "Select columns from the data table and assign them to " \
                  "data features, classes or meta variables."
    icon = "icons/SelectColumns.svg"
    priority = 100

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)
        features = Output("Features", widget.AttributeList, dynamic=False)

    want_main_area = False
    want_control_area = True

    settingsHandler = SelectAttributesDomainContextHandler()
    domain_role_hints = ContextSetting({})
    auto_commit = Setting(True)

    def __init__(self):
        super().__init__()
        self.controlArea = QWidget(self.controlArea)
        self.layout().addWidget(self.controlArea)
        layout = QGridLayout()
        self.controlArea.setLayout(layout)
        layout.setContentsMargins(4, 4, 4, 4)
        box = gui.vBox(self.controlArea,
                       "Available Variables",
                       addToLayout=False)

        self.available_attrs = VariableListModel(enable_dnd=True)
        filter_edit, self.available_attrs_view = variables_filter(
            parent=self, model=self.available_attrs)
        box.layout().addWidget(filter_edit)

        def dropcompleted(action):
            if action == Qt.MoveAction:
                self.commit()

        self.available_attrs_view.selectionModel().selectionChanged.connect(
            partial(self.update_interface_state, self.available_attrs_view))
        self.available_attrs_view.selectionModel().selectionChanged.connect(
            partial(self.update_interface_state, self.available_attrs_view))
        self.available_attrs_view.dragDropActionDidComplete.connect(
            dropcompleted)

        box.layout().addWidget(self.available_attrs_view)
        layout.addWidget(box, 0, 0, 3, 1)

        box = gui.vBox(self.controlArea, "Features", addToLayout=False)
        self.used_attrs = VariableListModel(enable_dnd=True)
        self.used_attrs_view = VariablesListItemView(
            acceptedType=(Orange.data.DiscreteVariable,
                          Orange.data.ContinuousVariable))

        self.used_attrs_view.setModel(self.used_attrs)
        self.used_attrs_view.selectionModel().selectionChanged.connect(
            partial(self.update_interface_state, self.used_attrs_view))
        self.used_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        box.layout().addWidget(self.used_attrs_view)
        layout.addWidget(box, 0, 2, 1, 1)

        box = gui.vBox(self.controlArea, "Target Variable", addToLayout=False)
        self.class_attrs = ClassVarListItemModel(enable_dnd=True)
        self.class_attrs_view = ClassVariableItemView(
            acceptedType=(Orange.data.DiscreteVariable,
                          Orange.data.ContinuousVariable))
        self.class_attrs_view.setModel(self.class_attrs)
        self.class_attrs_view.selectionModel().selectionChanged.connect(
            partial(self.update_interface_state, self.class_attrs_view))
        self.class_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        self.class_attrs_view.setMaximumHeight(72)
        box.layout().addWidget(self.class_attrs_view)
        layout.addWidget(box, 1, 2, 1, 1)

        box = gui.vBox(self.controlArea, "Meta Attributes", addToLayout=False)
        self.meta_attrs = VariableListModel(enable_dnd=True)
        self.meta_attrs_view = VariablesListItemView(
            acceptedType=Orange.data.Variable)
        self.meta_attrs_view.setModel(self.meta_attrs)
        self.meta_attrs_view.selectionModel().selectionChanged.connect(
            partial(self.update_interface_state, self.meta_attrs_view))
        self.meta_attrs_view.dragDropActionDidComplete.connect(dropcompleted)
        box.layout().addWidget(self.meta_attrs_view)
        layout.addWidget(box, 2, 2, 1, 1)

        bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0)
        layout.addWidget(bbox, 0, 1, 1, 1)

        self.up_attr_button = gui.button(bbox,
                                         self,
                                         "Up",
                                         callback=partial(
                                             self.move_up,
                                             self.used_attrs_view))
        self.move_attr_button = gui.button(bbox,
                                           self,
                                           ">",
                                           callback=partial(
                                               self.move_selected,
                                               self.used_attrs_view))
        self.down_attr_button = gui.button(bbox,
                                           self,
                                           "Down",
                                           callback=partial(
                                               self.move_down,
                                               self.used_attrs_view))

        bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0)
        layout.addWidget(bbox, 1, 1, 1, 1)

        self.up_class_button = gui.button(bbox,
                                          self,
                                          "Up",
                                          callback=partial(
                                              self.move_up,
                                              self.class_attrs_view))
        self.move_class_button = gui.button(bbox,
                                            self,
                                            ">",
                                            callback=partial(
                                                self.move_selected,
                                                self.class_attrs_view,
                                                exclusive=False))
        self.down_class_button = gui.button(bbox,
                                            self,
                                            "Down",
                                            callback=partial(
                                                self.move_down,
                                                self.class_attrs_view))

        bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0)
        layout.addWidget(bbox, 2, 1, 1, 1)
        self.up_meta_button = gui.button(bbox,
                                         self,
                                         "Up",
                                         callback=partial(
                                             self.move_up,
                                             self.meta_attrs_view))
        self.move_meta_button = gui.button(bbox,
                                           self,
                                           ">",
                                           callback=partial(
                                               self.move_selected,
                                               self.meta_attrs_view))
        self.down_meta_button = gui.button(bbox,
                                           self,
                                           "Down",
                                           callback=partial(
                                               self.move_down,
                                               self.meta_attrs_view))

        autobox = gui.auto_commit(None, self, "auto_commit", "Send")
        layout.addWidget(autobox, 3, 0, 1, 3)
        reset = gui.button(None, self, "Reset", callback=self.reset, width=120)
        autobox.layout().insertWidget(0, reset)
        autobox.layout().insertStretch(1, 20)

        layout.setRowStretch(0, 4)
        layout.setRowStretch(1, 0)
        layout.setRowStretch(2, 2)
        layout.setHorizontalSpacing(0)
        self.controlArea.setLayout(layout)

        self.data = None
        self.output_data = None
        self.original_completer_items = []

        self.resize(500, 600)

    @Inputs.data
    def set_data(self, data=None):
        self.update_domain_role_hints()
        self.closeContext()
        self.data = data
        if data is not None:
            self.openContext(data)
            all_vars = data.domain.variables + data.domain.metas

            var_sig = lambda attr: (attr.name, vartype(attr))

            domain_hints = {
                var_sig(attr): ("attribute", i)
                for i, attr in enumerate(data.domain.attributes)
            }

            domain_hints.update({
                var_sig(attr): ("meta", i)
                for i, attr in enumerate(data.domain.metas)
            })

            if data.domain.class_vars:
                domain_hints.update({
                    var_sig(attr): ("class", i)
                    for i, attr in enumerate(data.domain.class_vars)
                })

            # update the hints from context settings
            domain_hints.update(self.domain_role_hints)

            attrs_for_role = lambda role: [
                (domain_hints[var_sig(attr)][1], attr) for attr in all_vars
                if domain_hints[var_sig(attr)][0] == role
            ]

            attributes = [
                attr for place, attr in sorted(attrs_for_role("attribute"),
                                               key=lambda a: a[0])
            ]
            classes = [
                attr for place, attr in sorted(attrs_for_role("class"),
                                               key=lambda a: a[0])
            ]
            metas = [
                attr for place, attr in sorted(attrs_for_role("meta"),
                                               key=lambda a: a[0])
            ]
            available = [
                attr for place, attr in sorted(attrs_for_role("available"),
                                               key=lambda a: a[0])
            ]

            self.used_attrs[:] = attributes
            self.class_attrs[:] = classes
            self.meta_attrs[:] = metas
            self.available_attrs[:] = available
        else:
            self.used_attrs[:] = []
            self.class_attrs[:] = []
            self.meta_attrs[:] = []
            self.available_attrs[:] = []

        self.unconditional_commit()

    def update_domain_role_hints(self):
        """ Update the domain hints to be stored in the widgets settings.
        """
        hints_from_model = lambda role, model: [(
            (attr.name, vartype(attr)),
            (role, i)) for i, attr in enumerate(model)]
        hints = dict(hints_from_model("available", self.available_attrs))
        hints.update(hints_from_model("attribute", self.used_attrs))
        hints.update(hints_from_model("class", self.class_attrs))
        hints.update(hints_from_model("meta", self.meta_attrs))
        self.domain_role_hints = hints

    def selected_rows(self, view):
        """ Return the selected rows in the view.
        """
        rows = view.selectionModel().selectedRows()
        model = view.model()
        if isinstance(model, QSortFilterProxyModel):
            rows = [model.mapToSource(r) for r in rows]
        return [r.row() for r in rows]

    def move_rows(self, view, rows, offset):
        model = view.model()
        newrows = [min(max(0, row + offset), len(model) - 1) for row in rows]

        for row, newrow in sorted(zip(rows, newrows), reverse=offset > 0):
            model[row], model[newrow] = model[newrow], model[row]

        selection = QItemSelection()
        for nrow in newrows:
            index = model.index(nrow, 0)
            selection.select(index, index)
        view.selectionModel().select(selection,
                                     QItemSelectionModel.ClearAndSelect)

        self.commit()

    def move_up(self, view):
        selected = self.selected_rows(view)
        self.move_rows(view, selected, -1)

    def move_down(self, view):
        selected = self.selected_rows(view)
        self.move_rows(view, selected, 1)

    def move_selected(self, view, exclusive=False):
        if self.selected_rows(view):
            self.move_selected_from_to(view, self.available_attrs_view)
        elif self.selected_rows(self.available_attrs_view):
            self.move_selected_from_to(self.available_attrs_view, view,
                                       exclusive)

    def move_selected_from_to(self, src, dst, exclusive=False):
        self.move_from_to(src, dst, self.selected_rows(src), exclusive)

    def move_from_to(self, src, dst, rows, exclusive=False):
        src_model = source_model(src)
        attrs = [src_model[r] for r in rows]

        for s1, s2 in reversed(list(slices(rows))):
            del src_model[s1:s2]

        dst_model = source_model(dst)

        dst_model.extend(attrs)

        self.commit()

    def update_interface_state(self,
                               focus=None,
                               selected=None,
                               deselected=None):
        for view in [
                self.available_attrs_view, self.used_attrs_view,
                self.class_attrs_view, self.meta_attrs_view
        ]:
            if view is not focus and not view.hasFocus(
            ) and self.selected_rows(view):
                view.selectionModel().clear()

        def selected_vars(view):
            model = source_model(view)
            return [model[i] for i in self.selected_rows(view)]

        available_selected = selected_vars(self.available_attrs_view)
        attrs_selected = selected_vars(self.used_attrs_view)
        class_selected = selected_vars(self.class_attrs_view)
        meta_selected = selected_vars(self.meta_attrs_view)

        available_types = set(map(type, available_selected))
        all_primitive = all(var.is_primitive() for var in available_types)

        move_attr_enabled = (available_selected and all_primitive) or \
                            attrs_selected

        self.move_attr_button.setEnabled(bool(move_attr_enabled))
        if move_attr_enabled:
            self.move_attr_button.setText(">" if available_selected else "<")

        move_class_enabled = (all_primitive
                              and available_selected) or class_selected

        self.move_class_button.setEnabled(bool(move_class_enabled))
        if move_class_enabled:
            self.move_class_button.setText(">" if available_selected else "<")
        move_meta_enabled = available_selected or meta_selected

        self.move_meta_button.setEnabled(bool(move_meta_enabled))
        if move_meta_enabled:
            self.move_meta_button.setText(">" if available_selected else "<")

    def commit(self):
        self.update_domain_role_hints()
        if self.data is not None:
            attributes = list(self.used_attrs)
            class_var = list(self.class_attrs)
            metas = list(self.meta_attrs)

            domain = Orange.data.Domain(attributes, class_var, metas)
            newdata = self.data.transform(domain)
            self.output_data = newdata
            self.Outputs.data.send(newdata)
            self.Outputs.features.send(widget.AttributeList(attributes))
        else:
            self.output_data = None
            self.Outputs.data.send(None)
            self.Outputs.features.send(None)

    def reset(self):
        if self.data is not None:
            self.available_attrs[:] = []
            self.used_attrs[:] = self.data.domain.attributes
            self.class_attrs[:] = self.data.domain.class_vars
            self.meta_attrs[:] = self.data.domain.metas
            self.update_domain_role_hints()
            self.commit()

    def send_report(self):
        if not self.data or not self.output_data:
            return
        in_domain, out_domain = self.data.domain, self.output_data.domain
        self.report_domain("Input data", self.data.domain)
        if (in_domain.attributes, in_domain.class_vars,
                in_domain.metas) == (out_domain.attributes,
                                     out_domain.class_vars, out_domain.metas):
            self.report_paragraph("Output data", "No changes.")
        else:
            self.report_domain("Output data", self.output_data.domain)
            diff = list(
                set(in_domain.variables + in_domain.metas) -
                set(out_domain.variables + out_domain.metas))
            if diff:
                text = "%i (%s)" % (len(diff), ", ".join(x.name for x in diff))
                self.report_items((("Removed", text), ))
Esempio n. 15
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        order = (DomainModel.CLASSES, DomainModel.METAS,
                 DomainModel.ATTRIBUTES)
        self.attrs = DomainModel(order=order,
                                 valid_types=DomainModel.PRIMITIVE)
        view = gui.listView(self.controlArea,
                            self,
                            "attribute",
                            box="Variable",
                            model=self.attrs,
                            callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(
            view.box,
            self,
            "order_by_importance",
            "Order by relevance",
            tooltip="Order by 𝜒² or ANOVA over the subgroups",
            callback=self.apply_sorting)
        self.group_vars = DomainModel(order=order,
                                      placeholder="None",
                                      valid_types=Orange.data.DiscreteVariable)
        self.group_vars.clear()  # Remove 'None' from the list view
        view = gui.listView(self.controlArea,
                            self,
                            "group_var",
                            box="Subgroups",
                            model=self.group_vars,
                            callback=self.grouping_changed)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(self.controlArea,
                                    "Display",
                                    sizePolicy=(QSizePolicy.Minimum,
                                                QSizePolicy.Maximum))

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(self.controlArea,
                                             box="Display",
                                             sizePolicy=(QSizePolicy.Minimum,
                                                         QSizePolicy.Maximum))
        gui.checkBox(box,
                     self,
                     'stretched',
                     "Stretch bars",
                     callback=self.display_changed)
        gui.checkBox(box,
                     self,
                     'show_labels',
                     "Show box labels",
                     callback=self.display_changed)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(600)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(100, 500)  # Vertical size is regulated by mainArea

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.attrs.set_domain(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(),
                              f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.attrs.set_domain(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.set_domain(None)
        self.group_vars.set_domain(None)
        self.group_vars.clear()  # Remove 'None' from the list view
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(
                    box.filter.conditions in [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(dataset, attr,
                                                     self.group_var)
            if self.is_continuous:
                self.stats = [
                    BoxData(cont, attr, i, self.group_var)
                    for i, cont in enumerate(self.conts)
                ]
            self.label_txts_all = self.group_var.values
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.mean_labels = [
            self.mean_label(stat, attr, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, attr, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width,
                   len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts
                ]
            else:
                self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))]

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = [
                self.strudel(cont, i) for i, cont in enumerate(self.conts)
            ]
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box in enumerate(self.boxes):
            y = (-len(self.boxes) + row) * 40 + 10

            label = self.attr_labels[row]
            b = label.boundingRect()
            label.setPos(-b.width() - 10, y - b.height() / 2)
            self.box_scene.addItem(label)
            if not self.stretched:
                label = self.labels[row]
                b = label.boundingRect()
                if self.group_var:
                    right = self.scale_x * sum(self.conts[row])
                else:
                    right = self.scale_x * sum(self.dist)
                label.setPos(right + 10, y - b.height() / 2)
                self.box_scene.addItem(label)

            if self.show_labels and self.attribute is not self.group_var:
                for text_item, bar_part in zip(box[1::2], box[::2]):
                    label = QGraphicsSimpleTextItem(text_item.toPlainText())
                    label.setPos(bar_part.boundingRect().x(),
                                 y - label.boundingRect().height() - 8)
                    self.box_scene.addItem(label)
            for item in box:
                if isinstance(item, QGraphicsTextItem):
                    continue
                self.box_scene.addItem(item)
                item.setPos(0, y)
        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n == 0 or d2.n == 0:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            if pooled_var == 0:
                return np.nan, np.nan
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)

            t = self.box_scene.addSimpleText(
                self.attribute.repr_val(val) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        if self.stretched:
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width()
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x,
                                          -height / 2,
                                          (stat.q75 - stat.q25) * scale_x,
                                          height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2, stat.median,
                               height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [
            item.filter for item in self.box_scene.selectedItems()
            if item.filter
        ]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(self.dataset.ids,
                                selected.ids,
                                assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if last_to == to or frm == to:
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        if self.attribute:
            return self.attribute.name

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)
Esempio n. 16
0
class OWSaveBase(widget.OWWidget, openclass=True):
    """
    Base class for Save widgets

    A derived class must provide, at minimum:

    - class `Inputs` and the corresponding handler that:

      - saves the input to an attribute `data`, and
      - calls `self.on_new_input`.

    - a class attribute `filters` with a list of filters or a dictionary whose
      keys are filters OR a class method `get_filters` that returns such a
      list or dictionary
    - method `do_save` that saves `self.data` into `self.filename`

    Alternatively, instead of defining `do_save` a derived class can make
    `filters` a dictionary whose keys are classes that define a method `write`
    (like e.g. `TabReader`). Method `do_save` defined in the base class calls
    the writer corresponding to the currently chosen filter.

    A minimum example of derived class is
    `Orange.widgets.model.owsavemodel.OWSaveModel`.
    A more advanced widget that overrides a lot of base class behaviour is
    `Orange.widgets.data.owsave.OWSave`.
    """
    class Information(widget.OWWidget.Information):
        empty_input = widget.Msg("Empty input; nothing was saved.")

    class Error(widget.OWWidget.Error):
        no_file_name = widget.Msg("File name is not set.")
        general_error = widget.Msg("{}")

    want_main_area = False
    resizing_enabled = False

    last_dir = Setting("")
    filter = Setting("")  # default will be provided in __init__
    filename = Setting("", schema_only=True)
    auto_save = Setting(False)

    filters = []

    def __init__(self, start_row=0):
        """
        Set up the gui.

        The gui consists of a checkbox for auto save and two buttons put on a
        grid layout. Derived widgets that want to place controls above the auto
        save widget can set the `start_row` argument to the first free row,
        and this constructor will start filling the grid there.

        Args:
            start_row (int): the row at which to start filling the gui
        """
        super().__init__()
        self.data = None
        # This cannot be done outside because `filters` is defined by subclass
        if not self.filter:
            self.filter = next(iter(self.get_filters()))

        self.grid = grid = QGridLayout()
        gui.widgetBox(self.controlArea, orientation=grid)
        grid.addWidget(
            gui.checkBox(None,
                         self,
                         "auto_save",
                         "Autosave when receiving new data",
                         callback=self.update_messages), start_row, 0, 1, 2)
        grid.setRowMinimumHeight(start_row + 1, 8)
        self.bt_save = gui.button(None, self, "Save", callback=self.save_file)
        grid.addWidget(self.bt_save, start_row + 2, 0)
        grid.addWidget(
            gui.button(None, self, "Save as ...", callback=self.save_file_as),
            start_row + 2, 1)

        self.adjustSize()
        self.update_messages()

    @classmethod
    def get_filters(cls):
        return cls.filters

    @property
    def writer(self):
        """
        Return the active writer

        The base class uses this property only in `do_save` to find the writer
        corresponding to the filter. Derived classes (e.g. OWSave) may also use
        it elsewhere.
        """
        return self.get_filters()[self.filter]

    def on_new_input(self):
        """
        This method must be called from input signal handler.

        - It clears errors, warnings and information and calls
          `self.update_messages` to set the as needed.
        - It also calls `update_status` the can be overriden in derived
          methods to set the status (e.g. the number of input rows)
        - Calls `self.save_file` if `self.auto_save` is enabled and
          `self.filename` is provided.
        """
        self.Error.clear()
        self.Warning.clear()
        self.Information.clear()
        self.update_messages()
        self.update_status()
        if self.auto_save and self.filename:
            self.save_file()

    def save_file_as(self):
        """
        Ask the user for the filename and try saving the file
        """
        filename, selected_filter = self.get_save_filename()
        if not filename:
            return
        self.filename = filename
        self.filter = selected_filter
        self.last_dir = os.path.split(self.filename)[0]
        self.bt_save.setText(f"Save as {os.path.split(filename)[1]}")
        self.update_messages()
        self._try_save()

    def save_file(self):
        """
        If file name is provided, try saving, else call save_file_as
        """
        if not self.filename:
            self.save_file_as()
        else:
            self._try_save()

    def _try_save(self):
        """
        Private method that calls do_save within try-except that catches and
        shows IOError. Do nothing if not data or no file name.
        """
        self.Error.general_error.clear()
        if self.data is None or not self.filename:
            return
        try:
            self.do_save()
        except IOError as err_value:
            self.Error.general_error(str(err_value))

    def do_save(self):
        """
        Do the saving.

        Default implementation calls the write method of the writer
        corresponding to the current filter. This requires that get_filters()
        returns is a dictionary whose keys are classes.

        Derived classes may simplify this by providing a list of filters and
        override do_save. This is particularly handy if the widget supports only
        a single format.
        """
        # This method is separated out because it will usually be overriden
        self.writer.write(self.filename, self.data)

    def update_messages(self):
        """
        Update errors, warnings and information.

        Default method sets no_file_name if auto_save is enabled but file name
        is not provided; and empty_input if file name is given but there is no
        data.

        Derived classes that define further messages will typically set them in
        this method.
        """
        self.Error.no_file_name(shown=not self.filename and self.auto_save)
        self.Information.empty_input(shown=self.filename and self.data is None)

    def update_status(self):
        """
        Update the input/output indicator. Default method does nothing.
        """

    def initial_start_dir(self):
        """
        Provide initial start directory

        Return either the current file's path, the last directory or home.
        """
        if self.filename and os.path.exists(os.path.split(self.filename)[0]):
            return self.filename
        else:
            return self.last_dir or _userhome

    @staticmethod
    def suggested_name():
        """
        Suggest the name for the output file or return an empty string.
        """
        return ""

    @classmethod
    def _replace_extension(cls, filename, extension):
        """
        Remove all extensions that appear in any filter.

        Double extensions are broken in different weird ways across all systems,
        including omitting some, like turning iris.tab.gz to iris.gz. This
        function removes anything that can appear anywhere.
        """
        known_extensions = set()
        for filt in cls.get_filters():
            known_extensions |= set(
                cls._extension_from_filter(filt).split("."))
        if "" in known_extensions:
            known_extensions.remove("")
        while True:
            base, ext = os.path.splitext(filename)
            if ext[1:] not in known_extensions:
                break
            filename = base
        return filename + extension

    @staticmethod
    def _extension_from_filter(selected_filter):
        return re.search(r".*\(\*?(\..*)\)$", selected_filter).group(1)

    def valid_filters(self):
        return self.get_filters()

    def default_valid_filter(self):
        return self.filter

    # As of Qt 5.9, QFileDialog.setDefaultSuffix does not support double
    # suffixes, not even in non-native dialogs. We handle each OS separately.
    if sys.platform in ("darwin", "win32"):
        # macOS and Windows native dialogs do not correctly handle double
        # extensions. We thus don't pass any suffixes to the dialog and add
        # the correct suffix after closing the dialog and only then check
        # if the file exists and ask whether to override.
        # It is a bit confusing that the user does not see the final name in the
        # dialog, but I see no better solution.
        def get_save_filename(self):  # pragma: no cover
            if sys.platform == "darwin":

                def remove_star(filt):
                    return filt.replace(" (*.", " (.")
            else:

                def remove_star(filt):
                    return filt

            no_ext_filters = {remove_star(f): f for f in self.valid_filters()}
            filename = self.initial_start_dir()
            while True:
                dlg = QFileDialog(None, "Save File", filename,
                                  ";;".join(no_ext_filters))
                dlg.setAcceptMode(dlg.AcceptSave)
                dlg.selectNameFilter(remove_star(self.default_valid_filter()))
                dlg.setOption(QFileDialog.DontConfirmOverwrite)
                if dlg.exec() == QFileDialog.Rejected:
                    return "", ""
                filename = dlg.selectedFiles()[0]
                selected_filter = no_ext_filters[dlg.selectedNameFilter()]
                filename = self._replace_extension(
                    filename, self._extension_from_filter(selected_filter))
                if not os.path.exists(filename) or QMessageBox.question(
                        self, "Overwrite file?",
                        f"File {os.path.split(filename)[1]} already exists.\n"
                        "Overwrite?") == QMessageBox.Yes:
                    return filename, selected_filter

    else:  # Linux and any unknown platforms
        # Qt does not use a native dialog on Linux, so we can connect to
        # filterSelected and to overload selectFile to change the extension
        # while the dialog is open.
        # For unknown platforms (which?), we also use the non-native dialog to
        # be sure we know what happens.
        class SaveFileDialog(QFileDialog):
            # pylint: disable=protected-access
            def __init__(self, save_cls, *args, **kwargs):
                super().__init__(*args, **kwargs)
                self.save_cls = save_cls
                self.suffix = ""
                self.setAcceptMode(QFileDialog.AcceptSave)
                self.setOption(QFileDialog.DontUseNativeDialog)
                self.filterSelected.connect(self.updateDefaultExtension)

            def selectNameFilter(self, selected_filter):
                super().selectNameFilter(selected_filter)
                self.updateDefaultExtension(selected_filter)

            def updateDefaultExtension(self, selected_filter):
                self.suffix = \
                    self.save_cls._extension_from_filter(selected_filter)
                files = self.selectedFiles()
                if files and not os.path.isdir(files[0]):
                    self.selectFile(files[0])

            def selectFile(self, filename):
                filename = \
                    self.save_cls._replace_extension(filename, self.suffix)
                super().selectFile(filename)

        def get_save_filename(self):
            dlg = self.SaveFileDialog(type(self), None, "Save File",
                                      self.initial_start_dir(),
                                      ";;".join(self.valid_filters()))
            dlg.selectNameFilter(self.default_valid_filter())
            if dlg.exec() == QFileDialog.Rejected:
                return "", ""
            else:
                return dlg.selectedFiles()[0], dlg.selectedNameFilter()
Esempio n. 17
0
class OWLouvainClustering(widget.OWWidget):
    name = "Louvain Clustering"
    description = "Detects communities in a network of nearest neighbors."
    icon = "icons/LouvainClustering.svg"
    priority = 2110

    want_main_area = False

    settingsHandler = DomainContextHandler()

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME,
                                Table,
                                default=True)
        if Network is not None:
            graph = Output("Network", Network)

    apply_pca = ContextSetting(True)
    pca_components = ContextSetting(_DEFAULT_PCA_COMPONENTS)
    normalize = ContextSetting(True)
    metric_idx = ContextSetting(0)
    k_neighbors = ContextSetting(_DEFAULT_K_NEIGHBORS)
    resolution = ContextSetting(1.)
    auto_commit = Setting(False)

    class Information(widget.OWWidget.Information):
        modified = Msg("Press commit to recompute clusters and send new data")

    class Error(widget.OWWidget.Error):
        empty_dataset = Msg("No features in data")

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.preprocessed_data = None  # type: Optional[Table]
        self.pca_projection = None  # type: Optional[Table]
        self.graph = None  # type: Optional[nx.Graph]
        self.partition = None  # type: Optional[np.array]
        # Use a executor with a single worker, to limit CPU overcommitment for
        # cancelled tasks. The method does not have a fine cancellation
        # granularity so we assure that there are not N - 1 jobs executing
        # for no reason only to be thrown away. It would be better to use the
        # global pool but implement a limit on jobs from this source.
        self.__executor = futures.ThreadPoolExecutor(max_workers=1)
        self.__task = None  # type: Optional[TaskState]
        self.__invalidated = False
        # coalescing commit timer
        self.__commit_timer = QTimer(self, singleShot=True)
        self.__commit_timer.timeout.connect(self.commit)

        # Set up UI
        info_box = gui.vBox(self.controlArea, "Info")
        self.info_label = gui.widgetLabel(info_box,
                                          "No data on input.")  # type: QLabel

        preprocessing_box = gui.vBox(self.controlArea, "Preprocessing")
        self.normalize_cbx = gui.checkBox(
            preprocessing_box,
            self,
            "normalize",
            label="Normalize data",
            callback=self._invalidate_preprocessed_data,
        )  # type: QCheckBox
        self.apply_pca_cbx = gui.checkBox(
            preprocessing_box,
            self,
            "apply_pca",
            label="Apply PCA preprocessing",
            callback=self._apply_pca_changed,
        )  # type: QCheckBox
        self.pca_components_slider = gui.hSlider(
            preprocessing_box,
            self,
            "pca_components",
            label="PCA Components: ",
            minValue=2,
            maxValue=_MAX_PCA_COMPONENTS,
            callback=self._invalidate_pca_projection,
            tracking=False)  # type: QSlider

        graph_box = gui.vBox(self.controlArea, "Graph parameters")
        self.metric_combo = gui.comboBox(
            graph_box,
            self,
            "metric_idx",
            label="Distance metric",
            items=[m[0] for m in METRICS],
            callback=self._invalidate_graph,
            orientation=Qt.Horizontal,
        )  # type: gui.OrangeComboBox
        self.k_neighbors_spin = gui.spin(
            graph_box,
            self,
            "k_neighbors",
            minv=1,
            maxv=_MAX_K_NEIGBOURS,
            label="k neighbors",
            controlWidth=80,
            alignment=Qt.AlignRight,
            callback=self._invalidate_graph,
        )  # type: gui.SpinBoxWFocusOut
        self.resolution_spin = gui.hSlider(
            graph_box,
            self,
            "resolution",
            minValue=0,
            maxValue=5.,
            step=1e-1,
            label="Resolution",
            intOnly=False,
            labelFormat="%.1f",
            callback=self._invalidate_partition,
            tracking=False,
        )  # type: QSlider
        self.resolution_spin.parent().setToolTip(
            "The resolution parameter affects the number of clusters to find. "
            "Smaller values tend to produce more clusters and larger values "
            "retrieve less clusters.")
        self.apply_button = gui.auto_apply(
            self.controlArea,
            self,
            "auto_commit",
            box=None,
            commit=lambda: self.commit(),
            callback=lambda: self._on_auto_commit_changed())  # type: QWidget

    def _preprocess_data(self):
        if self.preprocessed_data is None:
            if self.normalize:
                normalizer = preprocess.Normalize(center=False)
                self.preprocessed_data = normalizer(self.data)
            else:
                self.preprocessed_data = self.data

    def _apply_pca_changed(self):
        self.controls.pca_components.setEnabled(self.apply_pca)
        self._invalidate_graph()

    def _invalidate_preprocessed_data(self):
        self.preprocessed_data = None
        self._invalidate_pca_projection()
        # If we don't apply PCA, this still invalidates the graph, otherwise
        # this change won't be propagated further
        if not self.apply_pca:
            self._invalidate_graph()

    def _invalidate_pca_projection(self):
        self.pca_projection = None
        if not self.apply_pca:
            return

        self._invalidate_graph()
        self._set_modified(True)

    def _invalidate_graph(self):
        self.graph = None
        self._invalidate_partition()
        self._set_modified(True)

    def _invalidate_partition(self):
        self.partition = None
        self._invalidate_output()
        self.Information.modified()
        self._set_modified(True)

    def _invalidate_output(self):
        self.__invalidated = True
        if self.__task is not None:
            self.__cancel_task(wait=False)

        if self.auto_commit:
            self.__commit_timer.start()
        else:
            self.__set_state_ready()

    def _set_modified(self, state):
        """
        Mark the widget (GUI) as containing modified state.
        """
        if self.data is None:
            # does not apply when we have no data
            state = False
        elif self.auto_commit:
            # does not apply when auto commit is on
            state = False
        self.Information.modified(shown=state)

    def _on_auto_commit_changed(self):
        if self.auto_commit and self.__invalidated:
            self.commit()

    def cancel(self):
        """Cancel any running jobs."""
        self.__cancel_task(wait=False)
        self.__set_state_ready()

    def commit(self):
        self.__commit_timer.stop()
        self.__invalidated = False
        self._set_modified(False)

        # Cancel current running task
        self.__cancel_task(wait=False)

        if self.data is None:
            self.__set_state_ready()
            return

        self.Error.clear()

        if self.partition is not None:
            self.__set_state_ready()
            self._send_data()
            return

        self._preprocess_data()

        state = TaskState(self)

        # Prepare/assemble the task(s) to run; reuse partial results
        if self.apply_pca:
            if self.pca_projection is not None:
                data = self.pca_projection
                pca_components = None
            else:
                data = self.preprocessed_data
                pca_components = self.pca_components
        else:
            data = self.preprocessed_data
            pca_components = None

        if self.graph is not None:
            # run on graph only; no need to do PCA and k-nn search ...
            graph = self.graph
            k_neighbors = metric = None
        else:
            k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1]
            graph = None

        if graph is None:
            task = partial(
                run_on_data,
                data,
                pca_components=pca_components,
                normalize=self.normalize,
                k_neighbors=k_neighbors,
                metric=metric,
                resolution=self.resolution,
                state=state,
            )
        else:
            task = partial(run_on_graph,
                           graph,
                           resolution=self.resolution,
                           state=state)

        self.info_label.setText("Running...")
        self.__set_state_busy()
        self.__start_task(task, state)

    @Slot(object)
    def __set_partial_results(self, result):
        # type: (Tuple[str, Any]) -> None
        which, res = result
        if which == "pca_projection":
            assert isinstance(res, Table) and len(res) == len(self.data)
            self.pca_projection = res
        elif which == "graph":
            assert isinstance(res, nx.Graph)
            self.graph = res
        elif which == "partition":
            assert isinstance(res, np.ndarray)
            self.partition = res
        else:
            assert False, which

    @Slot(object)
    def __on_done(self, future):
        # type: (Future["Results"]) -> None
        assert future.done()
        assert self.__task is not None
        assert self.__task.future is future
        assert self.__task.watcher.future() is future
        self.__task, task = None, self.__task
        task.deleteLater()

        self.__set_state_ready()

        result = future.result()
        self.__set_results(result)

    @Slot(str)
    def setStatusMessage(self, text):
        super().setStatusMessage(text)

    @Slot(float)
    def progressBarSet(self, value, *a, **kw):
        super().progressBarSet(value, *a, **kw)

    def __set_state_ready(self):
        self.progressBarFinished()
        self.setInvalidated(False)
        self.setStatusMessage("")

    def __set_state_busy(self):
        self.progressBarInit()
        self.setInvalidated(True)

    def __start_task(self, task, state):
        # type: (Callable[[], Any], TaskState) -> None
        assert self.__task is None
        state.status_changed.connect(self.setStatusMessage)
        state.progress_changed.connect(self.progressBarSet)
        state.partial_result_ready.connect(self.__set_partial_results)
        state.watcher.done.connect(self.__on_done)
        state.start(self.__executor, task)
        state.setParent(self)
        self.__task = state

    def __cancel_task(self, wait=True):
        # Cancel and dispose of the current task
        if self.__task is not None:
            state, self.__task = self.__task, None
            state.cancel()
            state.partial_result_ready.disconnect(self.__set_partial_results)
            state.status_changed.disconnect(self.setStatusMessage)
            state.progress_changed.disconnect(self.progressBarSet)
            state.watcher.done.disconnect(self.__on_done)
            if wait:
                futures.wait([state.future])
                state.deleteLater()
            else:
                w = FutureWatcher(state.future, parent=state)
                w.done.connect(state.deleteLater)

    def __set_results(self, results):
        # type: ("Results") -> None
        # NOTE: All of these have already been set by __set_partial_results,
        # we double check that they are aliases
        if results.pca_projection is not None:
            assert self.pca_components == results.pca_components
            assert self.pca_projection is results.pca_projection
            self.pca_projection = results.pca_projection
        if results.graph is not None:
            assert results.metric == METRICS[self.metric_idx][1]
            assert results.k_neighbors == self.k_neighbors
            assert self.graph is results.graph
            self.graph = results.graph
        if results.partition is not None:
            assert results.resolution == self.resolution
            assert self.partition is results.partition
            self.partition = results.partition

        # Display the number of found clusters in the UI
        num_clusters = len(np.unique(self.partition))
        self.info_label.setText("%d clusters found." % num_clusters)

        self._send_data()

    def _send_data(self):
        if self.partition is None or self.data is None:
            return
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=[
                "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition))
            ])

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition
        self.Outputs.annotated_data.send(new_table)

        if Network is not None:
            n_edges = self.graph.number_of_edges()
            edges = sp.coo_matrix(
                (np.ones(n_edges), np.array(self.graph.edges()).T),
                shape=(n_edges, n_edges))
            graph = Network(new_table, edges)
            self.Outputs.graph.send(graph)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.Error.clear()

        prev_data, self.data = self.data, data
        self.openContext(self.data)
        # Make sure to properly enable/disable slider based on `apply_pca` setting
        self.controls.pca_components.setEnabled(self.apply_pca)

        if prev_data and self.data and array_equal(prev_data.X, self.data.X):
            if self.auto_commit and not self.isInvalidated():
                self._send_data()
            return

        self.cancel()
        # Clear the outputs
        self.Outputs.annotated_data.send(None)
        if Network is not None:
            self.Outputs.graph.send(None)

        # Clear internal state
        self.clear()
        self._invalidate_pca_projection()

        # Make sure the dataset is ok
        if self.data is not None and len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            self.data = None

        if self.data is None:
            return

        # Can't have more PCA components than the number of attributes
        n_attrs = len(data.domain.attributes)
        self.pca_components_slider.setMaximum(min(_MAX_PCA_COMPONENTS,
                                                  n_attrs))
        # Can't have more k neighbors than there are data points
        self.k_neighbors_spin.setMaximum(min(_MAX_K_NEIGBOURS, len(data) - 1))

        self.info_label.setText("Clustering not yet run.")

        self.commit()

    def clear(self):
        self.__cancel_task(wait=False)
        self.preprocessed_data = None
        self.pca_projection = None
        self.graph = None
        self.partition = None
        self.Error.clear()
        self.Information.modified.clear()
        self.info_label.setText("No data on input.")

    def onDeleteWidget(self):
        self.__cancel_task(wait=True)
        self.__executor.shutdown(True)
        self.clear()
        self.data = None
        super().onDeleteWidget()

    def send_report(self):
        pca = report.bool_str(self.apply_pca)
        if self.apply_pca:
            pca += report.plural(", {number} component{s}",
                                 self.pca_components)

        self.report_items((
            ("Normalize data", report.bool_str(self.normalize)),
            ("PCA preprocessing", pca),
            ("Metric", METRICS[self.metric_idx][0]),
            ("k neighbors", self.k_neighbors),
            ("Resolution", self.resolution),
        ))
Esempio n. 18
0
class OWScatterPlotGraph(gui.OWComponent, ScaleScatterPlotData):
    attr_color = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_label = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_shape = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_size = ContextSetting(None, required=ContextSetting.OPTIONAL)
    label_only_selected = Setting(False)

    point_width = Setting(10)
    alpha_value = Setting(128)
    show_grid = Setting(False)
    show_legend = Setting(True)
    tooltip_shows_all = Setting(False)
    class_density = Setting(False)
    show_reg_line = Setting(False)
    resolution = 256

    CurveSymbols = np.array("o x t + d s t2 t3 p h star ?".split())
    MinShapeSize = 6
    DarkerValue = 120
    UnknownColor = (168, 50, 168)

    def __init__(self,
                 scatter_widget,
                 parent=None,
                 _="None",
                 view_box=InteractiveViewBox):
        gui.OWComponent.__init__(self, scatter_widget)
        self.view_box = view_box(self)
        self.plot_widget = pg.PlotWidget(viewBox=self.view_box,
                                         parent=parent,
                                         background="w")
        self.plot_widget.getPlotItem().buttonsHidden = True
        self.plot_widget.setAntialiasing(True)
        self.plot_widget.sizeHint = lambda: QSize(500, 500)
        scene = self.plot_widget.scene()
        self._create_drag_tooltip(scene)
        self._data = None  # Original Table as passed from widget to new_data before transformations

        self.replot = self.plot_widget.replot
        ScaleScatterPlotData.__init__(self)
        self.density_img = None
        self.scatterplot_item = None
        self.scatterplot_item_sel = None
        self.reg_line_item = None

        self.labels = []

        self.master = scatter_widget
        self.master.Warning.add_message(
            "missing_coords",
            "Plot cannot be displayed because '{}' or '{}' is missing for "
            "all data points")
        self.master.Information.add_message(
            "missing_coords",
            "Points with missing '{}' or '{}' are not displayed")
        self.master.Information.add_message(
            "missing_size",
            "Points with undefined '{}' are shown in smaller size")
        self.master.Information.add_message(
            "missing_shape",
            "Points with undefined '{}' are shown as crossed circles")
        self.shown_attribute_indices = []
        self.shown_x = self.shown_y = None
        self.pen_colors = self.brush_colors = None

        self.valid_data = None  # np.ndarray
        self.selection = None  # np.ndarray
        self.n_points = 0

        self.gui = OWPlotGUI(self)
        self.continuous_palette = ContinuousPaletteGenerator(
            QColor(255, 255, 0), QColor(0, 0, 255), True)
        self.discrete_palette = ColorPaletteGenerator()

        self.selection_behavior = 0

        self.legend = self.color_legend = None
        self.__legend_anchor = (1, 0), (1, 0)
        self.__color_legend_anchor = (1, 1), (1, 1)

        self.scale = None  # DiscretizedScale

        self.subset_indices = None

        # self.setMouseTracking(True)
        # self.grabGesture(QPinchGesture)
        # self.grabGesture(QPanGesture)

        self.update_grid()

        self._tooltip_delegate = HelpEventDelegate(self.help_event)
        self.plot_widget.scene().installEventFilter(self._tooltip_delegate)

    def _create_drag_tooltip(self, scene):
        tip_parts = [(Qt.ShiftModifier, "Shift: Add group"),
                     (Qt.ShiftModifier + Qt.ControlModifier,
                      "Shift-{}: Append to group".format(
                          "Cmd" if sys.platform == "darwin" else "Ctrl")),
                     (Qt.AltModifier, "Alt: Remove")]
        all_parts = ", ".join(part for _, part in tip_parts)
        self.tiptexts = {
            int(modifier): all_parts.replace(part, "<b>{}</b>".format(part))
            for modifier, part in tip_parts
        }
        self.tiptexts[0] = all_parts

        self.tip_textitem = text = QGraphicsTextItem()
        # Set to the longest text
        text.setHtml(self.tiptexts[Qt.ShiftModifier + Qt.ControlModifier])
        text.setPos(4, 2)
        r = text.boundingRect()
        rect = QGraphicsRectItem(0, 0, r.width() + 8, r.height() + 4)
        rect.setBrush(QColor(224, 224, 224, 212))
        rect.setPen(QPen(Qt.NoPen))
        self.update_tooltip(Qt.NoModifier)

        scene.drag_tooltip = scene.createItemGroup([rect, text])
        scene.drag_tooltip.hide()

    def update_tooltip(self, modifiers):
        modifiers &= Qt.ShiftModifier + Qt.ControlModifier + Qt.AltModifier
        text = self.tiptexts.get(int(modifiers), self.tiptexts[0])
        self.tip_textitem.setHtml(text)

    def new_data(self, data, subset_data=None, new=True, **args):
        if new:
            self.plot_widget.clear()
            self.remove_legend()

            self.density_img = None
            self.scatterplot_item = None
            self.scatterplot_item_sel = None
            self.reg_line_item = None
            self.labels = []
            self.selection = None
            self.valid_data = None

        self.subset_indices = set(
            e.id for e in subset_data) if subset_data else None

        self._data = data
        data = self.sparse_to_dense()
        self.set_data(data, **args)

    def sparse_to_dense(self):
        data = self._data
        if data is None or not data.is_sparse():
            return data

        attrs = {
            self.shown_x, self.shown_y, self.attr_color, self.attr_shape,
            self.attr_size, self.attr_label
        }
        domain = data.domain
        all_attrs = domain.variables + domain.metas
        attrs = list(set(all_attrs) & attrs)
        selected_data = data[:, attrs].to_dense()
        return selected_data

    def _clear_plot_widget(self):
        self.remove_legend()
        if self.density_img:
            self.plot_widget.removeItem(self.density_img)
            self.density_img = None
        if self.scatterplot_item:
            self.plot_widget.removeItem(self.scatterplot_item)
            self.scatterplot_item = None
        if self.scatterplot_item_sel:
            self.plot_widget.removeItem(self.scatterplot_item_sel)
            self.scatterplot_item_sel = None
        if self.reg_line_item:
            self.plot_widget.removeItem(self.reg_line_item)
            self.reg_line_item = None
        for label in self.labels:
            self.plot_widget.removeItem(label)
        self.labels = []
        self.set_axis_title("bottom", "")
        self.set_axis_title("left", "")

    def update_data(self, attr_x, attr_y, reset_view=True):
        self.master.Warning.missing_coords.clear()
        self.master.Information.missing_coords.clear()
        self._clear_plot_widget()

        if self.shown_y != attr_y:
            # 'reset' the axis text width estimation. Without this the left
            # axis tick labels space only ever expands
            yaxis = self.plot_widget.getAxis("left")
            yaxis.textWidth = 30

        self.shown_x, self.shown_y = attr_x, attr_y
        if attr_x not in self.data.domain or attr_y not in self.data.domain:
            data = self.sparse_to_dense()
            self.set_data(data)

        if self.jittered_data is None or not len(self.jittered_data):
            self.valid_data = None
        else:
            self.valid_data = self.get_valid_list([attr_x, attr_y])
            if not np.any(self.valid_data):
                self.valid_data = None
        if self.valid_data is None:
            self.selection = None
            self.n_points = 0
            self.master.Warning.missing_coords(self.shown_x.name,
                                               self.shown_y.name)
            return

        x_data, y_data = self.get_xy_data_positions(attr_x, attr_y,
                                                    self.valid_data)
        self.n_points = len(x_data)

        if reset_view:
            min_x, max_x = np.nanmin(x_data), np.nanmax(x_data)
            min_y, max_y = np.nanmin(y_data), np.nanmax(y_data)
            self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x,
                                          max_y - min_y),
                                   padding=0.025)
            self.view_box.init_history()
            self.view_box.tag_history()
        [min_x, max_x], [min_y, max_y] = self.view_box.viewRange()

        for axis, var in (("bottom", attr_x), ("left", attr_y)):
            self.set_axis_title(axis, var)
            if var.is_discrete:
                self.set_labels(axis, get_variable_values_sorted(var))
            else:
                self.set_labels(axis, None)

        color_data, brush_data = self.compute_colors()
        color_data_sel, brush_data_sel = self.compute_colors_sel()
        size_data = self.compute_sizes()
        shape_data = self.compute_symbols()

        if self.should_draw_density():
            rgb_data = [pen.color().getRgb()[:3] for pen in color_data]
            self.density_img = classdensity.class_density_image(
                min_x, max_x, min_y, max_y, self.resolution, x_data, y_data,
                rgb_data)
            self.plot_widget.addItem(self.density_img)

        self.data_indices = np.flatnonzero(self.valid_data)
        if len(self.data_indices) != len(self.data):
            self.master.Information.missing_coords(self.shown_x.name,
                                                   self.shown_y.name)

        self.scatterplot_item = ScatterPlotItem(x=x_data,
                                                y=y_data,
                                                data=self.data_indices,
                                                symbol=shape_data,
                                                size=size_data,
                                                pen=color_data,
                                                brush=brush_data)
        self.scatterplot_item_sel = ScatterPlotItem(x=x_data,
                                                    y=y_data,
                                                    data=self.data_indices,
                                                    symbol=shape_data,
                                                    size=size_data +
                                                    SELECTION_WIDTH,
                                                    pen=color_data_sel,
                                                    brush=brush_data_sel)
        self.plot_widget.addItem(self.scatterplot_item_sel)
        self.plot_widget.addItem(self.scatterplot_item)

        self.scatterplot_item.selected_points = []
        self.scatterplot_item.sigClicked.connect(self.select_by_click)

        self.draw_regression_line(x_data, y_data, min_x, max_x)
        self.update_labels()
        self.make_legend()
        self.plot_widget.replot()

    def draw_regression_line(self, x_data, y_data, min_x, max_x):
        if self.show_reg_line and self.can_draw_regresssion_line():
            slope, intercept, rvalue, _, _ = linregress(x_data, y_data)
            start_y = min_x * slope + intercept
            end_y = max_x * slope + intercept
            angle = np.degrees(np.arctan((end_y - start_y) / (max_x - min_x)))
            rotate = ((angle + 45) % 180) - 45 > 90
            color = QColor("#505050")
            l_opts = dict(color=color,
                          position=abs(int(rotate) - 0.85),
                          rotateAxis=(1, 0),
                          movable=True)
            self.reg_line_item = InfiniteLine(
                pos=QPointF(min_x, start_y),
                pen=pg.mkPen(color=color, width=1),
                angle=angle,
                label="r = {:.2f}".format(rvalue),
                labelOpts=l_opts)
            if rotate:
                self.reg_line_item.label.angle = 180
                self.reg_line_item.label.updateTransform()
            self.plot_widget.addItem(self.reg_line_item)

    def can_draw_density(self):
        return self.domain is not None and \
            self.attr_color is not None and \
            self.attr_color.is_discrete and \
            self.shown_x.is_continuous and \
            self.shown_y.is_continuous

    def should_draw_density(self):
        return self.class_density and self.n_points > 1 and self.can_draw_density(
        )

    def can_draw_regresssion_line(self):
        return self.domain is not None and \
               self.shown_x.is_continuous and \
               self.shown_y.is_continuous

    def set_labels(self, axis, labels):
        axis = self.plot_widget.getAxis(axis)
        if labels:
            ticks = [[(i, labels[i]) for i in range(len(labels))]]
            axis.setTicks(ticks)
        else:
            axis.setTicks(None)

    def set_axis_title(self, axis, title):
        self.plot_widget.setLabel(axis=axis, text=title)

    def compute_sizes(self):
        self.master.Information.missing_size.clear()
        if self.attr_size is None:
            size_data = np.full((self.n_points, ),
                                self.point_width,
                                dtype=float)
        else:
            size_data = \
                self.MinShapeSize + \
                self.scaled_data.get_column_view(self.attr_size)[0][self.valid_data] * \
                self.point_width
        nans = np.isnan(size_data)
        if np.any(nans):
            size_data[nans] = self.MinShapeSize - 2
            self.master.Information.missing_size(self.attr_size)
        return size_data

    def update_sizes(self):
        self.set_data(self.sparse_to_dense())
        self.update_point_size()

    def update_point_size(self):
        if self.scatterplot_item:
            size_data = self.compute_sizes()
            self.scatterplot_item.setSize(size_data)
            self.scatterplot_item_sel.setSize(size_data + SELECTION_WIDTH)

    def get_color(self):
        if self.attr_color is None:
            return None
        colors = self.attr_color.colors
        if self.attr_color.is_discrete:
            self.discrete_palette = ColorPaletteGenerator(
                number_of_colors=min(len(colors), MAX),
                rgb_colors=colors if len(colors) <= MAX else DefaultRGBColors)
        else:
            self.continuous_palette = ContinuousPaletteGenerator(*colors)
        return self.attr_color

    def compute_colors_sel(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors_sel = self.brush_colors_sel = None

        nopen = QPen(Qt.NoPen)
        if self.selection is not None:
            sels = np.max(self.selection)
            if sels == 1:
                pens = [
                    nopen,
                    _make_pen(QColor(255, 190, 0, 255), SELECTION_WIDTH + 1.)
                ]
            else:
                # Start with the first color so that the colors of the
                # additional attribute in annotation (which start with 0,
                # unselected) will match these colors
                palette = ColorPaletteGenerator(number_of_colors=sels + 1)
                pens = [nopen] + \
                       [_make_pen(palette[i + 1], SELECTION_WIDTH + 1.)
                        for i in range(sels)]
            pen = [pens[a] for a in self.selection[self.valid_data]]
        else:
            pen = [nopen] * self.n_points
        brush = [QBrush(QColor(255, 255, 255, 0))] * self.n_points
        return pen, brush

    def _reduce_values(self, attr):
        """
        If discrete variable has more than maximium allowed values,
        less used values are joined as "Other"
        """
        c_data = self.data.get_column_view(attr)[0][self.valid_data]
        if attr.is_continuous or len(attr.values) <= MAX:
            return None, c_data
        values_to_replace = Counter(c_data)
        values_to_replace = sorted(values_to_replace,
                                   key=values_to_replace.get,
                                   reverse=True)
        return values_to_replace, c_data

    def _get_values(self, attr):
        if len(attr.values) <= MAX:
            return attr.values
        values_to_replace, _ = self._reduce_values(attr)
        return [
            attr.values[int(i)] for i in values_to_replace if not np.isnan(i)
        ][:MAX - 1] + ["Other"]

    def _get_data(self, attr):
        values_to_replace, c_data = self._reduce_values(attr)
        if values_to_replace is not None:
            c_data_2 = c_data.copy()
            for i, v in enumerate(values_to_replace):
                c_data[c_data_2 == v] = i if i < MAX - 1 else MAX - 1
        return c_data

    def compute_colors(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors = self.brush_colors = None
        self.get_color()

        subset = None
        if self.subset_indices:
            subset = np.array([
                ex.id in self.subset_indices
                for ex in self.data[self.valid_data]
            ])

        if self.attr_color is None:  # same color
            color = self.plot_widget.palette().color(OWPalette.Data)
            pen = [_make_pen(color, 1.5)] * self.n_points
            if subset is not None:
                brush = [(QBrush(QColor(128, 128, 128,
                                        0)), QBrush(QColor(128, 128, 128,
                                                           255)))[s]
                         for s in subset]
            else:
                brush = [QBrush(QColor(128, 128, 128, self.alpha_value))] \
                        * self.n_points
            return pen, brush

        c_data = self._get_data(self.attr_color)
        if self.attr_color.is_continuous:
            if self.pen_colors is None:
                self.scale = DiscretizedScale(np.nanmin(c_data),
                                              np.nanmax(c_data))
                c_data -= self.scale.offset
                c_data /= self.scale.width
                c_data = np.floor(c_data) + 0.5
                c_data /= self.scale.bins
                c_data = np.clip(c_data, 0, 1)
                palette = self.continuous_palette
                self.pen_colors = palette.getRGB(c_data)
                self.brush_colors = np.hstack([
                    self.pen_colors,
                    np.full((self.n_points, 1), self.alpha_value, dtype=int)
                ])
                self.pen_colors *= 100
                self.pen_colors //= self.DarkerValue
                self.pen_colors = [
                    _make_pen(QColor(*col), 1.5)
                    for col in self.pen_colors.tolist()
                ]
            if subset is not None:
                self.brush_colors[:, 3] = 0
                self.brush_colors[subset, 3] = 255
            else:
                self.brush_colors[:, 3] = self.alpha_value
            pen = self.pen_colors
            brush = np.array(
                [QBrush(QColor(*col)) for col in self.brush_colors.tolist()])
        else:
            if self.pen_colors is None:
                palette = self.discrete_palette
                n_colors = palette.number_of_colors
                c_data = c_data.copy()
                c_data[np.isnan(c_data)] = n_colors
                c_data = c_data.astype(int)
                colors = np.r_[palette.getRGB(np.arange(n_colors)),
                               [[128, 128, 128]]]
                pens = np.array([
                    _make_pen(QColor(*col).darker(self.DarkerValue), 1.5)
                    for col in colors
                ])
                self.pen_colors = pens[c_data]
                alpha = self.alpha_value if subset is None else 255
                self.brush_colors = np.array([[
                    QBrush(QColor(0, 0, 0, 0)),
                    QBrush(QColor(col[0], col[1], col[2], alpha))
                ] for col in colors])
                self.brush_colors = self.brush_colors[c_data]
            if subset is not None:
                brush = np.where(subset, self.brush_colors[:, 1],
                                 self.brush_colors[:, 0])
            else:
                brush = self.brush_colors[:, 1]
            pen = self.pen_colors
        return pen, brush

    def update_colors(self, keep_colors=False):
        self.master.update_colors()
        self.set_data(self.sparse_to_dense())
        self.update_alpha_value(keep_colors)

    def update_alpha_value(self, keep_colors=False):
        if self.scatterplot_item:
            pen_data, brush_data = self.compute_colors(keep_colors)
            pen_data_sel, brush_data_sel = self.compute_colors_sel(keep_colors)
            self.scatterplot_item.setPen(pen_data, update=False, mask=None)
            self.scatterplot_item.setBrush(brush_data, mask=None)
            self.scatterplot_item_sel.setPen(pen_data_sel,
                                             update=False,
                                             mask=None)
            self.scatterplot_item_sel.setBrush(brush_data_sel, mask=None)
            if not keep_colors:
                self.make_legend()

                if self.should_draw_density():
                    self.update_data(self.shown_x, self.shown_y)
                elif self.density_img:
                    self.plot_widget.removeItem(self.density_img)

    def create_labels(self):
        for x, y in zip(*self.scatterplot_item.getData()):
            ti = TextItem()
            self.plot_widget.addItem(ti)
            ti.setPos(x, y)
            self.labels.append(ti)

    def _create_label_column(self):
        if self.attr_label in self.data.domain:
            label_column = self.data.get_column_view(self.attr_label)[0]
        else:
            label_column = self.master.data.get_column_view(self.attr_label)[0]
        return label_column[self.data_indices]

    def update_labels(self):
        if self.attr_label is None or \
                self.label_only_selected and self.selection is None:
            for label in self.labels:
                label.setText("")
            return
        self.assure_attribute_present(self.attr_label)
        if not self.labels:
            self.create_labels()
        label_column = self._create_label_column()
        formatter = self.attr_label.str_val
        label_data = map(formatter, label_column)
        black = pg.mkColor(0, 0, 0)
        selection = self.selection[
            self.valid_data] if self.selection is not None else []
        if self.label_only_selected:
            for label, text, selected \
                    in zip(self.labels, label_data, selection):
                label.setText(text if selected else "", black)
        else:
            for label, text in zip(self.labels, label_data):
                label.setText(text, black)

    def compute_symbols(self):
        self.master.Information.missing_shape.clear()
        if self.attr_shape is None:
            shape_data = self.CurveSymbols[np.zeros(self.n_points, dtype=int)]
        else:
            shape_data = self._get_data(self.attr_shape)
            nans = np.isnan(shape_data)
            if np.any(nans):
                shape_data[nans] = len(self.CurveSymbols) - 1
                self.master.Information.missing_shape(self.attr_shape)
            shape_data = self.CurveSymbols[shape_data.astype(int)]
        return shape_data

    def update_shapes(self):
        self.assure_attribute_present(self.attr_shape)
        if self.scatterplot_item:
            shape_data = self.compute_symbols()
            self.scatterplot_item.setSymbol(shape_data)
        self.make_legend()

    def assure_attribute_present(self, attr):
        if self.data is not None and attr not in self.data.domain:
            self.set_data(self.sparse_to_dense())

    def update_grid(self):
        self.plot_widget.showGrid(x=self.show_grid, y=self.show_grid)

    def update_legend(self):
        if self.legend:
            self.legend.setVisible(self.show_legend)

    def create_legend(self):
        self.legend = LegendItem()
        self.legend.setParentItem(self.plot_widget.getViewBox())
        self.legend.restoreAnchor(self.__legend_anchor)

    def remove_legend(self):
        if self.legend:
            anchor = legend_anchor_pos(self.legend)
            if anchor is not None:
                self.__legend_anchor = anchor
            self.legend.setParent(None)
            self.legend = None
        if self.color_legend:
            anchor = legend_anchor_pos(self.color_legend)
            if anchor is not None:
                self.__color_legend_anchor = anchor
            self.color_legend.setParent(None)
            self.color_legend = None

    def make_legend(self):
        self.remove_legend()
        self.make_color_legend()
        self.make_shape_legend()
        self.update_legend()

    def make_color_legend(self):
        if self.attr_color is None:
            return
        use_shape = self.attr_shape == self.get_color()
        if self.attr_color.is_discrete:
            if not self.legend:
                self.create_legend()
            palette = self.discrete_palette
            for i, value in enumerate(self._get_values(self.attr_color)):
                color = QColor(*palette.getRGB(i))
                pen = _make_pen(color.darker(self.DarkerValue), 1.5)
                color.setAlpha(
                    self.alpha_value if self.subset_indices is None else 255)
                brush = QBrush(color)
                self.legend.addItem(
                    ScatterPlotItem(
                        pen=pen,
                        brush=brush,
                        size=10,
                        symbol=self.CurveSymbols[i] if use_shape else "o"),
                    escape(value))
        else:
            legend = self.color_legend = LegendItem()
            legend.setParentItem(self.plot_widget.getViewBox())
            legend.restoreAnchor(self.__color_legend_anchor)

            label = PaletteItemSample(self.continuous_palette, self.scale)
            legend.addItem(label, "")
            legend.setGeometry(label.boundingRect())

    def make_shape_legend(self):
        if self.attr_shape is None or self.attr_shape == self.get_color():
            return
        if not self.legend:
            self.create_legend()
        color = QColor(0, 0, 0)
        color.setAlpha(self.alpha_value)
        for i, value in enumerate(self._get_values(self.attr_shape)):
            self.legend.addItem(
                ScatterPlotItem(pen=color,
                                brush=color,
                                size=10,
                                symbol=self.CurveSymbols[i]), escape(value))

    def zoom_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def pan_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().PanMode)

    def select_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def reset_button_clicked(self):
        self.update_data(self.shown_x, self.shown_y,
                         reset_view=True)  # also redraw density image
        # self.view_box.autoRange()

    def select_by_click(self, _, points):
        if self.scatterplot_item is not None:
            self.select(points)

    def select_by_rectangle(self, value_rect):
        if self.scatterplot_item is not None:
            points = [
                point for point in self.scatterplot_item.points()
                if value_rect.contains(QPointF(point.pos()))
            ]
            self.select(points)

    def unselect_all(self):
        self.selection = None
        self.update_colors(keep_colors=True)
        if self.label_only_selected:
            self.update_labels()
        self.master.selection_changed()

    def select(self, points):
        # noinspection PyArgumentList
        if self.data is None:
            return
        if self.selection is None:
            self.selection = np.zeros(len(self.data), dtype=np.uint8)
        indices = [p.data() for p in points]
        keys = QApplication.keyboardModifiers()
        # Remove from selection
        if keys & Qt.AltModifier:
            self.selection[indices] = 0
        # Append to the last group
        elif keys & Qt.ShiftModifier and keys & Qt.ControlModifier:
            self.selection[indices] = np.max(self.selection)
        # Create a new group
        elif keys & Qt.ShiftModifier:
            self.selection[indices] = np.max(self.selection) + 1
        # No modifiers: new selection
        else:
            self.selection = np.zeros(len(self.data), dtype=np.uint8)
            self.selection[indices] = 1
        self.update_colors(keep_colors=True)
        if self.label_only_selected:
            self.update_labels()
        self.master.selection_changed()

    def get_selection(self):
        if self.selection is None:
            return np.array([], dtype=np.uint8)
        else:
            return np.flatnonzero(self.selection)

    def set_palette(self, p):
        self.plot_widget.setPalette(p)

    def save_to_file(self, size):
        pass

    def help_event(self, event):
        if self.scatterplot_item is None:
            return False

        domain = self.data.domain
        PARTS = (("Class", "Classes", 4, domain.class_vars),
                 ("Meta", "Metas", 4, domain.metas), ("Feature", "Features",
                                                      10, domain.attributes))

        def format_val(var, point_data, bold=False):
            text = escape('{} = {}'.format(var.name, point_data[var]))
            if bold:
                text = "<b>{}</b>".format(text)
            return text

        def show_part(point_data, singular, plural, max_shown, vars):
            cols = [
                format_val(var, point_data) for var in vars[:max_shown + 2]
                if vars == domain.class_vars or var not in (self.shown_x,
                                                            self.shown_y)
            ][:max_shown]
            if not cols:
                return ""
            n_vars = len(vars)
            if n_vars > max_shown:
                cols[-1] = "... and {} others".format(n_vars - max_shown + 1)
            return \
                "<br/><b>{}</b>:<br/>".format(singular if n_vars < 2
                                              else plural) \
                + "<br/>".join(cols)

        def point_data(p):
            point_data = self.data[p.data()]
            text = "<br/>".join(
                format_val(var, point_data, bold=self.tooltip_shows_all)
                for var in (self.shown_x, self.shown_y))
            if self.tooltip_shows_all:
                text += "<br/>" + \
                        "".join(show_part(point_data, *columns)
                                for columns in PARTS)
            return text

        act_pos = self.scatterplot_item.mapFromScene(event.scenePos())
        points = self.scatterplot_item.pointsAt(act_pos)
        if len(points):
            text = "<hr/>".join(point_data(point) for point in points)
            QToolTip.showText(event.screenPos(), text, widget=self.plot_widget)
            return True
        else:
            return False

    def box_zoom_select(self, parent):
        g = self.gui
        box_zoom_select = gui.vBox(parent, "Zoom/Select")
        zoom_select_toolbar = g.zoom_select_toolbar(box_zoom_select,
                                                    nomargin=True,
                                                    buttons=[
                                                        g.StateButtonsBegin,
                                                        g.SimpleSelect, g.Pan,
                                                        g.Zoom,
                                                        g.StateButtonsEnd,
                                                        g.ZoomReset
                                                    ])
        buttons = zoom_select_toolbar.buttons
        buttons[g.Zoom].clicked.connect(self.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked)
        return box_zoom_select

    def zoom_actions(self, parent):
        def zoom(s):
            """
            Zoom in/out by factor `s`.
            scaleBy scales the view's bounds (the axis range)
            """
            self.view_box.scaleBy((1 / s, 1 / s))

        def fit_to_view():
            self.viewbox.autoRange()

        zoom_in = QAction("Zoom in", parent, triggered=lambda: zoom(1.25))
        zoom_in.setShortcuts([
            QKeySequence(QKeySequence.ZoomIn),
            QKeySequence(parent.tr("Ctrl+="))
        ])
        zoom_out = QAction("Zoom out",
                           parent,
                           shortcut=QKeySequence.ZoomOut,
                           triggered=lambda: zoom(1 / 1.25))
        zoom_fit = QAction("Fit in view",
                           parent,
                           shortcut=QKeySequence(Qt.ControlModifier
                                                 | Qt.Key_0),
                           triggered=fit_to_view)
        parent.addActions([zoom_in, zoom_out, zoom_fit])
Esempio n. 19
0
class OWPivot(OWWidget):
    name = "Pivot Table"
    description = "Reshape data table based on column values."
    icon = "icons/Pivot.svg"
    priority = 1000
    keywords = ["pivot", "group", "aggregate"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        pivot_table = Output("Pivot Table", Table, default=True)
        filtered_data = Output("Filtered Data", Table)
        grouped_data = Output("Grouped Data", Table)

    class Warning(OWWidget.Warning):
        # TODO - inconsistent for different variable types
        no_col_feature = Msg("Column feature should be selected.")
        cannot_aggregate = Msg("Some aggregations ({}) cannot be performed.")

    settingsHandler = DomainContextHandler()
    row_feature = ContextSetting(None)
    col_feature = ContextSetting(None)
    val_feature = ContextSetting(None)
    sel_agg_functions = Setting(set([Pivot.Count]))
    selection = ContextSetting(set())
    auto_commit = Setting(True)

    AGGREGATIONS = (Pivot.Count, Pivot.Count_defined, None, Pivot.Sum,
                    Pivot.Mean, Pivot.Mode, Pivot.Min, Pivot.Max, Pivot.Median,
                    Pivot.Var, None, Pivot.Majority)

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.pivot = None  # type: Pivot
        self._add_control_area_controls()
        self._add_main_area_controls()

    def _add_control_area_controls(self):
        box = gui.vBox(self.controlArea, "Rows")
        gui.comboBox(box,
                     self,
                     "row_feature",
                     contentsLength=12,
                     model=DomainModel(valid_types=DomainModel.PRIMITIVE),
                     callback=self.__feature_changed)
        box = gui.vBox(self.controlArea, "Columns")
        gui.comboBox(box,
                     self,
                     "col_feature",
                     contentsLength=12,
                     model=DomainModel(placeholder="(Same as rows)",
                                       valid_types=DiscreteVariable),
                     callback=self.__feature_changed)
        box = gui.vBox(self.controlArea, "Values")
        gui.comboBox(box,
                     self,
                     "val_feature",
                     contentsLength=12,
                     model=DomainModel(placeholder="(None)"),
                     orientation=Qt.Horizontal,
                     callback=self.__val_feature_changed)
        self.__add_aggregation_controls()
        gui.rubber(self.controlArea)
        gui.auto_apply(self.controlArea, self, "auto_commit")

    def __add_aggregation_controls(self):
        box = gui.vBox(self.controlArea, "Aggregations")
        for agg in self.AGGREGATIONS:
            if agg is None:
                gui.separator(box, height=1)
                line = QFrame()
                line.setFrameShape(QFrame.HLine)
                line.setLineWidth(1)
                line.setFrameShadow(QFrame.Sunken)
                box.layout().addWidget(line)
                continue
            check_box = QCheckBox(str(agg), box)
            check_box.setChecked(agg in self.sel_agg_functions)
            check_box.clicked.connect(
                lambda *args, a=agg: self.__aggregation_cb_clicked(a, args[0]))
            box.layout().addWidget(check_box)

    def _add_main_area_controls(self):
        self.table_view = PivotTableView()
        self.table_view.selection_changed.connect(self.__invalidate_filtered)
        self.mainArea.layout().addWidget(self.table_view)

    @property
    def no_col_feature(self):
        return self.col_feature is None and self.row_feature is not None \
            and self.row_feature.is_continuous

    @property
    def skipped_aggs(self):
        def add(fun):
            data, var = self.data, self.val_feature
            return data and not var and fun not in Pivot.AutonomousFunctions \
                or var and var.is_discrete and fun in Pivot.ContVarFunctions \
                or var and var.is_continuous and fun in Pivot.DiscVarFunctions

        skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)]
        return ", ".join(sorted(skipped))

    def __feature_changed(self):
        self.selection = set()
        self.pivot = None
        self.commit()

    def __val_feature_changed(self):
        self.selection = set()
        if self.no_col_feature:
            return
        self.pivot.update_pivot_table(self.val_feature)
        self.commit()

    def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions,
                                 checked: bool):
        self.selection = set()
        if checked:
            self.sel_agg_functions.add(agg_fun)
        else:
            self.sel_agg_functions.remove(agg_fun)
        if self.no_col_feature or not self.pivot or not self.data:
            return
        self.pivot.update_group_table(self.sel_agg_functions, self.val_feature)
        self.commit()

    def __invalidate_filtered(self):
        self.selection = self.table_view.get_selection()
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self.pivot = None
        self.check_data()
        self.init_attr_values()
        self.openContext(self.data)
        self.unconditional_commit()

    def check_data(self):
        self.clear_messages()
        if not self.data:
            self.table_view.clear()

    def init_attr_values(self):
        domain = self.data.domain if self.data and len(self.data) else None
        for attr in ("row_feature", "col_feature", "val_feature"):
            getattr(self.controls, attr).model().set_domain(domain)
            setattr(self, attr, None)
        model = self.controls.row_feature.model()
        if model:
            self.row_feature = model[0]
        model = self.controls.val_feature.model()
        if model and len(model) > 2:
            self.val_feature = domain.variables[0] \
                if domain.variables[0] in model else model[2]

    def commit(self):
        if self.pivot is None:
            self.Warning.no_col_feature.clear()
            if self.no_col_feature:
                self.Warning.no_col_feature()
                return
            self.pivot = Pivot(self.data, self.sel_agg_functions,
                               self.row_feature, self.col_feature,
                               self.val_feature)
        self.Warning.cannot_aggregate.clear()
        if self.skipped_aggs:
            self.Warning.cannot_aggregate(self.skipped_aggs)
        self._update_graph()
        self.Outputs.grouped_data.send(self.pivot.group_table)
        self.Outputs.pivot_table.send(self.pivot.pivot_table)
        self.Outputs.filtered_data.send(self.get_filtered_data())

    def _update_graph(self):
        self.table_view.clear()
        if self.pivot.pivot_table:
            col_feature = self.col_feature or self.row_feature
            self.table_view.update_table(col_feature.name,
                                         self.row_feature.name,
                                         *self.pivot.pivot_tables)
            self.table_view.set_selection(self.selection)

    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values([f for f in cond], conjunction=False)(self.data)

    def sizeHint(self):
        return QSize(640, 525)

    def send_report(self):
        self.report_items((("Row feature", self.row_feature),
                           ("Column feature", self.col_feature),
                           ("Value feature", self.val_feature)))
        if self.data and self.val_feature is not None:
            self.report_table("", self.table_view)
        if not self.data:
            self.report_items((("Group by", self.row_feature), ))
            self.report_table(self.table_view)
Esempio n. 20
0
class OWTranspose(OWWidget):
    name = "Transpose"
    description = "Transpose data table."
    icon = "icons/Transpose.svg"
    priority = 2000
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table, dynamic=False)

    GENERIC, FROM_VAR = range(2)

    resizing_enabled = False
    want_main_area = False

    DEFAULT_PREFIX = "Feature"

    settingsHandler = DomainContextHandler()
    feature_type = ContextSetting(GENERIC)
    feature_name = ContextSetting("")
    feature_names_column = ContextSetting(None)
    auto_apply = Setting(True)

    class Warning(OWWidget.Warning):
        duplicate_names = Msg("Values are not unique.\nTo avoid multiple "
                              "features with the same name, values \nof "
                              "'{}' have been augmented with indices.")

    class Error(OWWidget.Error):
        value_error = Msg("{}")

    def __init__(self):
        super().__init__()
        self.data = None

        # self.apply is changed later, pylint: disable=unnecessary-lambda
        box = gui.radioButtons(self.controlArea,
                               self,
                               "feature_type",
                               box="Feature names",
                               callback=lambda: self.apply())

        button = gui.appendRadioButton(box, "Generic")
        edit = gui.lineEdit(gui.indentedBox(box,
                                            gui.checkButtonOffsetHint(button)),
                            self,
                            "feature_name",
                            placeholderText="Type a prefix ...",
                            toolTip="Custom feature name")
        edit.editingFinished.connect(self._apply_editing)

        self.meta_button = gui.appendRadioButton(box, "From variable:")
        self.feature_model = DomainModel(valid_types=(ContinuousVariable,
                                                      StringVariable),
                                         alphabetical=False)
        self.feature_combo = gui.comboBox(gui.indentedBox(
            box, gui.checkButtonOffsetHint(button)),
                                          self,
                                          "feature_names_column",
                                          contentsLength=12,
                                          callback=self._feature_combo_changed,
                                          model=self.feature_model)

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "&Apply",
                                            box=False,
                                            commit=self.apply)
        self.apply_button.button.setAutoDefault(False)

        self.set_controls()

    def _apply_editing(self):
        self.feature_type = self.GENERIC
        self.feature_name = self.feature_name.strip()
        self.apply()

    def _feature_combo_changed(self):
        self.feature_type = self.FROM_VAR
        self.apply()

    @Inputs.data
    def set_data(self, data):
        # Skip the context if the combo is empty: a context with
        # feature_model == None would then match all domains
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.set_controls()
        if self.feature_model:
            self.openContext(data)
        self.apply()

    def set_controls(self):
        self.feature_model.set_domain(self.data and self.data.domain)
        self.meta_button.setEnabled(bool(self.feature_model))
        if self.feature_model:
            self.feature_names_column = self.feature_model[0]
            self.feature_type = self.FROM_VAR
        else:
            self.feature_names_column = None

    def apply(self):
        self.clear_messages()
        transposed = None
        if self.data:
            try:
                variable = self.feature_type == self.FROM_VAR and \
                           self.feature_names_column
                transposed = Table.transpose(self.data,
                                             variable,
                                             feature_name=self.feature_name
                                             or self.DEFAULT_PREFIX)
                if variable:
                    names = self.data.get_column_view(variable)[0]
                    if len(names) != len(set(names)):
                        self.Warning.duplicate_names(variable)
            except ValueError as e:
                self.Error.value_error(e)
        self.Outputs.data.send(transposed)

    def send_report(self):
        if self.feature_type == self.GENERIC:
            names = self.feature_name or self.DEFAULT_PREFIX
        else:
            names = "from variable"
            if self.feature_names_column:
                names += "  '{}'".format(self.feature_names_column.name)
        self.report_items("", [("Feature names", names)])
        if self.data:
            self.report_data("Data", self.data)
Esempio n. 21
0
class OWManifoldLearning(OWWidget):
    name = "Manifold Learning"
    description = "Nonlinear dimensionality reduction."
    icon = "icons/Manifold.svg"
    priority = 2200
    keywords = []
    settings_version = 2

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        transformed_data = Output("Transformed Data",
                                  Table,
                                  dynamic=False,
                                  replaces=["Transformed data"])

    MANIFOLD_METHODS = (TSNE, MDS, Isomap, LocallyLinearEmbedding,
                        SpectralEmbedding)

    tsne_editor = SettingProvider(TSNEParametersEditor)
    mds_editor = SettingProvider(MDSParametersEditor)
    isomap_editor = SettingProvider(IsomapParametersEditor)
    lle_editor = SettingProvider(LocallyLinearEmbeddingParametersEditor)
    spectral_editor = SettingProvider(SpectralEmbeddingParametersEditor)

    resizing_enabled = False
    want_main_area = False

    manifold_method_index = Setting(0)
    n_components = Setting(2)
    auto_apply = Setting(True)

    class Error(OWWidget.Error):
        n_neighbors_too_small = Msg("For chosen method and components, "
                                    "neighbors must be greater than {}")
        manifold_error = Msg("{}")
        sparse_not_supported = Msg("Sparse data is not supported.")
        out_of_memory = Msg("Out of memory")

    class Warning(OWWidget.Warning):
        graph_not_connected = Msg("Disconnected graph, embedding may not work")

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2:
            tsne_settings = settings.get('tsne_editor', {})
            # Fixup initialization index
            if 'init_index' in tsne_settings:
                idx = tsne_settings.pop('init_index')
                idx = min(idx, len(TSNEParametersEditor.initialization_values))
                tsne_settings['initialization_index'] = idx
            # We removed several metrics here
            if 'metric_index' in tsne_settings:
                idx = tsne_settings['metric_index']
                idx = min(idx, len(TSNEParametersEditor.metric_values))
                tsne_settings['metric_index'] = idx

    def __init__(self):
        self.data = None

        # GUI
        method_box = gui.vBox(self.controlArea, "Method")
        self.manifold_methods_combo = gui.comboBox(
            method_box,
            self,
            "manifold_method_index",
            items=[m.name for m in self.MANIFOLD_METHODS],
            callback=self.manifold_method_changed)

        self._set_input_summary()
        self._set_output_summary(None)

        self.params_box = gui.vBox(self.controlArea, "Parameters")

        self.tsne_editor = TSNEParametersEditor(self)
        self.mds_editor = MDSParametersEditor(self)
        self.isomap_editor = IsomapParametersEditor(self)
        self.lle_editor = LocallyLinearEmbeddingParametersEditor(self)
        self.spectral_editor = SpectralEmbeddingParametersEditor(self)
        self.parameter_editors = [
            self.tsne_editor, self.mds_editor, self.isomap_editor,
            self.lle_editor, self.spectral_editor
        ]

        for editor in self.parameter_editors:
            self.params_box.layout().addWidget(editor)
            editor.hide()
        self.params_widget = self.parameter_editors[self.manifold_method_index]
        self.params_widget.show()

        output_box = gui.vBox(self.controlArea, "Output")
        self.n_components_spin = gui.spin(output_box,
                                          self,
                                          "n_components",
                                          1,
                                          10,
                                          label="Components:",
                                          alignment=Qt.AlignRight,
                                          callbackOnReturn=True,
                                          callback=self.settings_changed)
        self.apply_button = gui.auto_apply(self.controlArea,
                                           self,
                                           box=False,
                                           commit=self.apply)

    def manifold_method_changed(self):
        self.params_widget.hide()
        self.params_widget = self.parameter_editors[self.manifold_method_index]
        self.params_widget.show()
        self.apply()

    def settings_changed(self):
        self.apply()

    @Inputs.data
    def set_data(self, data):
        self.data = data
        self._set_input_summary()
        self.n_components_spin.setMaximum(
            len(self.data.domain.attributes) if self.data else 10)
        self.unconditional_apply()

    def apply(self):
        builtin_warn = warnings.warn

        def _handle_disconnected_graph_warning(msg, *args, **kwargs):
            if msg.startswith("Graph is not fully connected"):
                self.Warning.graph_not_connected()
            else:
                builtin_warn(msg, *args, **kwargs)

        out = None
        data = self.data
        method = self.MANIFOLD_METHODS[self.manifold_method_index]
        have_data = data is not None and len(data)
        self.Error.clear()
        self.Warning.clear()

        if have_data and data.is_sparse():
            self.Error.sparse_not_supported()
        elif have_data:
            names = [
                var.name
                for var in chain(data.domain.class_vars, data.domain.metas)
                if var
            ]
            proposed = ["C{}".format(i) for i in range(self.n_components)]
            unique = get_unique_names(names, proposed)
            domain = Domain([ContinuousVariable(name) for name in unique],
                            data.domain.class_vars, data.domain.metas)
            try:
                warnings.warn = _handle_disconnected_graph_warning
                projector = method(**self.get_method_parameters(data, method))
                model = projector(data)
                if isinstance(model, TSNEModel):
                    out = model.embedding
                else:
                    X = model.embedding_
                    out = Table(domain, X, data.Y, data.metas)
            except ValueError as e:
                if e.args[0] == "for method='hessian', n_neighbors " \
                                "must be greater than [n_components" \
                                " * (n_components + 3) / 2]":
                    n = self.n_components * (self.n_components + 3) / 2
                    self.Error.n_neighbors_too_small("{}".format(n))
                else:
                    self.Error.manifold_error(e.args[0])
            except MemoryError:
                self.Error.out_of_memory()
            except np.linalg.linalg.LinAlgError as e:
                self.Error.manifold_error(str(e))
            finally:
                warnings.warn = builtin_warn

        self._set_output_summary(out)
        self.Outputs.transformed_data.send(out)

    def _set_input_summary(self):
        summary = len(self.data) if self.data else self.info.NoInput
        details = format_summary_details(self.data) if self.data else ""
        self.info.set_input_summary(summary, details)

    def _set_output_summary(self, output):
        summary = len(output) if output else self.info.NoOutput
        details = format_summary_details(output) if output else ""
        self.info.set_output_summary(summary, details)

    def get_method_parameters(self, data, method):
        parameters = dict(n_components=self.n_components)
        parameters.update(self.params_widget.get_parameters())
        return parameters

    def send_report(self):
        method = self.MANIFOLD_METHODS[self.manifold_method_index]
        self.report_items((("Method", method.name), ))
        parameters = self.get_method_parameters(self.data, method)
        self.report_items("Method parameters", tuple(parameters.items()))
        if self.data:
            self.report_data("Data", self.data)
Esempio n. 22
0
class OWExplainFeatureBase(OWWidget, ConcurrentWidgetMixin, openclass=True):
    class Inputs:
        data = Input("Data", Table, default=True)
        model = Input("Model", Model)

    class Outputs:
        selected_data = Output("Selected Data", Table)
        scores = Output("Scores", Table)

    class Error(OWWidget.Error):
        domain_transform_err = Msg("{}")
        unknown_err = Msg("An error occurred.\n{}")

    class Information(OWWidget.Information):
        data_sampled = Msg("Data has been sampled.")

    settingsHandler = NotImplemented
    n_attributes = Setting(10)
    zoom_level = Setting(0)
    selection = Setting((), schema_only=True)
    auto_send = Setting(True)
    visual_settings = Setting({}, schema_only=True)

    graph_name = "scene"
    PLOT_CLASS = FeaturesPlot

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.results: Optional[BaseResults] = None
        self.data: Optional[Table] = None
        self.model: Optional[Model] = None
        self.plot: Optional[FeaturesPlot] = None
        self.scene: Optional[GraphicsScene] = None
        self.view: Optional[GraphicsView] = None
        self.setup_gui()
        self.__pending_selection = self.selection

        initial = self.PLOT_CLASS().parameter_setter.initial_settings
        VisualSettingsDialog(self, initial)

    # GUI setup
    def setup_gui(self):
        self._add_controls()
        self._add_plot()
        self._add_buttons()
        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    def _add_plot(self):
        self.scene = GraphicsScene()
        self.view = GraphicsView(self.scene)
        self.view.resized.connect(self._update_plot)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def _add_controls(self):
        self.display_box = box = gui.vBox(self.controlArea, "Display")
        gui.spin(box,
                 self,
                 "n_attributes",
                 1,
                 MAX_N_ITEMS,
                 label="Top features:",
                 controlWidth=50,
                 callback=self.__n_spin_changed)
        gui.hSlider(box,
                    self,
                    "zoom_level",
                    minValue=0,
                    maxValue=200,
                    width=150,
                    label="Zoom:",
                    createLabel=False,
                    callback=self.__zoom_slider_changed)

    def _add_buttons(self):
        gui.rubber(self.controlArea)
        gui.auto_send(self.buttonsArea, self, "auto_send")

    def __zoom_slider_changed(self):
        if self.plot is not None:
            self.plot.set_height(self.zoom_level)

    def __n_spin_changed(self):
        if self.plot is not None:
            self.plot.set_n_visible(self.n_attributes)

    # Inputs
    @Inputs.data
    @check_sql_input
    def set_data(self, data: Optional[Table]):
        self.data = data
        summary = len(data) if data else self.info.NoInput
        details = format_summary_details(data) if data else ""
        self.info.set_input_summary(summary, details)
        self._check_data()

    def _check_data(self):
        pass

    @Inputs.model
    def set_model(self, model: Optional[Model]):
        self.closeContext()
        self.model = model
        self.setup_controls()
        self.openContext(self.model)

    def setup_controls(self):
        pass

    def handleNewSignals(self):
        self.clear()
        self.start(self.run, *self.get_runner_parameters())

    def get_runner_parameters(self) -> Tuple:
        return self.data, self.model

    def clear(self):
        self.results = None
        self.cancel()
        self._clear_selection()
        self._clear_scene()
        self.Error.domain_transform_err.clear()
        self.Error.unknown_err.clear()
        self.Information.data_sampled.clear()

    # Plot setup
    def _clear_scene(self):
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self.view.setSceneRect(QRectF())
        self.view.setHeaderSceneRect(QRectF())
        self.view.setFooterSceneRect(QRectF())
        self.plot = None

    def update_scene(self):
        self._clear_scene()

    def setup_plot(self, values: np.ndarray, names: List[str], *plot_args):
        width = int(self.view.viewport().rect().width())
        self.plot = self.PLOT_CLASS()
        self.plot.set_data(values, names, self.n_attributes, width, *plot_args)
        self.plot.apply_visual_settings(self.visual_settings)
        self.plot.selection_cleared.connect(self._clear_selection)
        self.plot.selection_changed.connect(self.update_selection)
        self.plot.layout().activate()
        self.plot.geometryChanged.connect(self._update_scene_rect)
        self.plot.resized.connect(self._update_plot)
        self.scene.addItem(self.plot)
        self.scene.mouse_clicked.connect(self.plot.deselect)
        self._update_scene_rect()
        self._update_plot()

    def _update_scene_rect(self):
        def extend_horizontal(rect):
            rect = QRectF(rect)
            rect.setLeft(geom.left())
            rect.setRight(geom.right())
            return rect

        geom = self.plot.geometry()
        self.scene.setSceneRect(geom)
        self.view.setSceneRect(geom)

        footer_geom = self.plot.bottom_axis.geometry()
        footer = extend_horizontal(footer_geom.adjusted(0, -3, 0, 10))
        self.view.setFooterSceneRect(footer)

    def _update_plot(self):
        if self.plot is not None:
            width = int(self.view.viewport().rect().width())
            self.plot.rescale(width)

    # Selection
    def _clear_selection(self):
        if self.selection:
            self.selection = ()
            self.commit()

    def update_selection(self, *_):
        raise NotImplementedError

    def select_pending(self, pending_selection: Tuple):
        self.__pending_selection = pending_selection
        self.unconditional_commit()

    # Outputs
    def commit(self):
        selected_data = self.get_selected_data()
        if not selected_data:
            self.info.set_output_summary(self.info.NoOutput)
        else:
            detail = format_summary_details(selected_data)
            self.info.set_output_summary(len(selected_data), detail)
        self.Outputs.selected_data.send(selected_data)

    def get_selected_data(self) -> Optional[Table]:
        raise NotImplementedError

    def update_scores(self):
        scores = None
        if self.results is not None:
            scores = self.get_scores_table()
        self.Outputs.scores.send(scores)

    def get_scores_table(self) -> Table:
        raise NotImplementedError

    # Concurrent
    def on_partial_result(self, _):
        pass

    def on_done(self, results: Optional[BaseResults]):
        self.results = results
        if self.data and results is not None and not all(results.mask):
            self.Information.data_sampled()
        self.update_scene()
        self.update_scores()
        self.select_pending(self.__pending_selection)

    def on_exception(self, ex: Exception):
        if isinstance(ex, DomainTransformationError):
            self.Error.domain_transform_err(ex)
        else:
            self.Error.unknown_err(ex)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    # Misc
    def sizeHint(self) -> QSizeF:
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(800, 520))

    def send_report(self):
        if not self.data or not self.model:
            return
        self.report_plot()

    def set_visual_settings(self, key, value):
        self.visual_settings[key] = value
        if self.plot is not None:
            self.plot.parameter_setter.set_parameter(key, value)

    @staticmethod
    def run(data: Table, model: Model, *, state: TaskState) -> BaseResults:
        raise NotImplementedError
Esempio n. 23
0
class OWTreeGraph(OWTreeViewer2D):
    """Graphical visualization of tree models"""

    name = "树查看器"
    icon = "icons/TreeViewer.svg"
    priority = 35
    keywords = []

    class Inputs:
        # Had different input names before merging from
        # Classification/Regression tree variants
        tree = Input("树",
                     TreeModel,
                     replaces=["Classification Tree", "Regression Tree"])

    class Outputs:
        selected_data = Output("被选数据", Table, default=True, id="selected-data")
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME,
                                Table,
                                id="annotated-data")

    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    regression_colors = Setting(0)

    replaces = [
        "Orange.widgets.classify.owclassificationtreegraph.OWClassificationTreeGraph",
        "Orange.widgets.classify.owregressiontreegraph.OWRegressionTreeGraph"
    ]

    COL_OPTIONS = ["Default", "Number of instances", "Mean value", "Variance"]
    COL_DEFAULT, COL_INSTANCE, COL_MEAN, COL_VARIANCE = range(4)

    def __init__(self):
        super().__init__()
        self.domain = None
        self.dataset = None
        self.clf_dataset = None
        self.tree_adapter = None

        self.color_label = QLabel("目标类: ")
        combo = self.color_combo = gui.OrangeComboBox()
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)
        combo.setSizeAdjustPolicy(
            QComboBox.AdjustToMinimumContentsLengthWithIcon)
        combo.setMinimumContentsLength(8)
        combo.activated[int].connect(self.color_changed)
        self.display_box.layout().addRow(self.color_label, combo)

    def set_node_info(self):
        """Set the content of the node"""
        for node in self.scene.nodes():
            node.set_rect(QRectF())
            self.update_node_info(node)
        w = max([n.rect().width() for n in self.scene.nodes()] + [0])
        if w > self.max_node_width:
            w = self.max_node_width
        for node in self.scene.nodes():
            rect = node.rect()
            node.set_rect(QRectF(rect.x(), rect.y(), w, rect.height()))
        self.scene.fix_pos(self.root_node, 10, 10)

    def _update_node_info_attr_name(self, node, text):
        attr = self.tree_adapter.attribute(node.node_inst)
        if attr is not None:
            text += "<hr/>{}".format(attr.name)
        return text

    def activate_loaded_settings(self):
        if not self.model:
            return
        super().activate_loaded_settings()
        if self.domain.class_var.is_discrete:
            self.color_combo.setCurrentIndex(self.target_class_index)
            self.toggle_node_color_cls()
        else:
            self.color_combo.setCurrentIndex(self.regression_colors)
            self.toggle_node_color_reg()
        self.set_node_info()

    def color_changed(self, i):
        if self.domain.class_var.is_discrete:
            self.target_class_index = i
            self.toggle_node_color_cls()
            self.set_node_info()
        else:
            self.regression_colors = i
            self.toggle_node_color_reg()

    def toggle_node_size(self):
        self.set_node_info()
        self.scene.update()
        self.scene_view.repaint()

    def toggle_color_cls(self):
        self.toggle_node_color_cls()
        self.set_node_info()
        self.scene.update()

    def toggle_color_reg(self):
        self.toggle_node_color_reg()
        self.set_node_info()
        self.scene.update()

    @Inputs.tree
    def ctree(self, model=None):
        """Input signal handler"""
        self.clear_scene()
        self.color_combo.clear()
        self.closeContext()
        self.model = model
        self.target_class_index = 0
        if model is None:
            self.info.setText('没有树。')
            self.root_node = None
            self.dataset = None
            self.tree_adapter = None
        else:
            self.tree_adapter = self._get_tree_adapter(model)
            self.domain = model.domain
            self.dataset = model.instances
            if self.dataset is not None and self.dataset.domain != self.domain:
                self.clf_dataset = self.dataset.transform(model.domain)
            else:
                self.clf_dataset = self.dataset
            class_var = self.domain.class_var
            if class_var.is_discrete:
                self.scene.colors = [QColor(*col) for col in class_var.colors]
                self.color_label.setText("目标类: ")
                self.color_combo.addItem("无")
                self.color_combo.addItems(self.domain.class_vars[0].values)
                self.color_combo.setCurrentIndex(self.target_class_index)
            else:
                self.scene.colors = \
                    ContinuousPaletteGenerator(*model.domain.class_var.colors)
                self.color_label.setText("调色: ")
                self.color_combo.addItems(self.COL_OPTIONS)
                self.color_combo.setCurrentIndex(self.regression_colors)
            self.openContext(self.domain.class_var)
            # self.root_node = self.walkcreate(model.root, None)
            self.root_node = self.walkcreate(self.tree_adapter.root)
            self.info.setText('{} nodes, {} leaves'.format(
                self.tree_adapter.num_nodes,
                len(self.tree_adapter.leaves(self.tree_adapter.root))))
        self.setup_scene()
        self.Outputs.selected_data.send(None)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, []))

    def walkcreate(self, node, parent=None):
        """Create a structure of tree nodes from the given model"""
        node_obj = TreeNode(self.tree_adapter, node, parent)
        self.scene.addItem(node_obj)
        if parent:
            edge = GraphicsEdge(node1=parent, node2=node_obj)
            self.scene.addItem(edge)
            parent.graph_add_edge(edge)
        for child_inst in self.tree_adapter.children(node):
            if child_inst is not None:
                self.walkcreate(child_inst, node_obj)
        return node_obj

    def node_tooltip(self, node):
        return "<br>".join(
            to_html(str(rule))
            for rule in self.tree_adapter.rules(node.node_inst))

    def update_selection(self):
        if self.model is None:
            return
        nodes = [
            item.node_inst for item in self.scene.selectedItems()
            if isinstance(item, TreeNode)
        ]
        data = self.tree_adapter.get_instances_in_nodes(nodes)
        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset,
                                   self.tree_adapter.get_indices(nodes)))

    def send_report(self):
        if not self.model:
            return
        items = [
            ("Tree size", self.info.text()),
            (
                "Edge widths",
                ("固定", "相对于根", "相对于父")[
                    # pylint: disable=invalid-sequence-index
                    self.line_width_method])
        ]
        if self.domain.class_var.is_discrete:
            items.append(("Target class", self.color_combo.currentText()))
        elif self.regression_colors != self.COL_DEFAULT:
            items.append(
                ("Color by", self.COL_OPTIONS[self.regression_colors]))
        self.report_items(items)
        self.report_plot(self.scene)

    def update_node_info(self, node):
        if self.domain.class_var.is_discrete:
            self.update_node_info_cls(node)
        else:
            self.update_node_info_reg(node)

    def update_node_info_cls(self, node):
        """Update the printed contents of the node for classification trees"""
        node_inst = node.node_inst
        distr = self.tree_adapter.get_distribution(node_inst)[0]
        total = self.tree_adapter.num_samples(node_inst)
        distr = distr / np.sum(distr)
        if self.target_class_index:
            tabs = distr[self.target_class_index - 1]
            text = ""
        else:
            modus = np.argmax(distr)
            tabs = distr[modus]
            text = f"{self.domain.class_vars[0].values[int(modus)]}<br/>"
        if tabs > 0.999:
            text += f"100%, {total}/{total}"
        else:
            text += f"{100 * tabs:2.1f}%, {int(total * tabs)}/{total}"

        text = self._update_node_info_attr_name(node, text)
        node.setHtml(
            f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>')

    def update_node_info_reg(self, node):
        """Update the printed contents of the node for regression trees"""
        node_inst = node.node_inst
        mean, var = self.tree_adapter.get_distribution(node_inst)[0]
        insts = self.tree_adapter.num_samples(node_inst)
        text = f"{mean:.1f} ± {var:.1f}<br/>"
        text += f"{insts} instances"
        text = self._update_node_info_attr_name(node, text)
        node.setHtml(
            f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>')

    def toggle_node_color_cls(self):
        """Update the node color for classification trees"""
        colors = self.scene.colors
        for node in self.scene.nodes():
            distr = node.tree_adapter.get_distribution(node.node_inst)[0]
            total = sum(distr)
            if self.target_class_index:
                p = distr[self.target_class_index - 1] / total
                color = colors[self.target_class_index - 1].lighter(200 -
                                                                    100 * p)
            else:
                modus = np.argmax(distr)
                p = distr[modus] / (total or 1)
                color = colors[int(modus)].lighter(300 - 200 * p)
            node.backgroundBrush = QBrush(color)
        self.scene.update()

    def toggle_node_color_reg(self):
        """Update the node color for regression trees"""
        def_color = QColor(192, 192, 255)
        if self.regression_colors == self.COL_DEFAULT:
            brush = QBrush(def_color.lighter(100))
            for node in self.scene.nodes():
                node.backgroundBrush = brush
        elif self.regression_colors == self.COL_INSTANCE:
            max_insts = len(
                self.tree_adapter.get_instances_in_nodes(
                    [self.tree_adapter.root]))
            for node in self.scene.nodes():
                node_insts = len(
                    self.tree_adapter.get_instances_in_nodes([node.node_inst]))
                node.backgroundBrush = QBrush(
                    def_color.lighter(120 - 20 * node_insts / max_insts))
        elif self.regression_colors == self.COL_MEAN:
            minv = np.nanmin(self.dataset.Y)
            maxv = np.nanmax(self.dataset.Y)
            fact = 1 / (maxv - minv) if minv != maxv else 1
            colors = self.scene.colors
            for node in self.scene.nodes():
                node_mean = self.tree_adapter.get_distribution(
                    node.node_inst)[0][0]
                node.backgroundBrush = QBrush(colors[fact *
                                                     (node_mean - minv)])
        else:
            nodes = list(self.scene.nodes())
            variances = [
                self.tree_adapter.get_distribution(node.node_inst)[0][1]
                for node in nodes
            ]
            max_var = max(variances)
            for node, var in zip(nodes, variances):
                node.backgroundBrush = QBrush(
                    def_color.lighter(120 - 20 * var / max_var))
        self.scene.update()

    def _get_tree_adapter(self, model):
        if isinstance(model, SklModel):
            return SklTreeAdapter(model)
        return TreeAdapter(model)
Esempio n. 24
0
class RadvizVizRank(VizRankDialog, OWComponent):
    captionTitle = "Score Plots"
    n_attrs = Setting(3)
    minK = 10

    attrsSelected = Signal([])
    _AttrRole = next(gui.OrangeUserRole)

    percent_data_used = Setting(100)

    def __init__(self, master):
        """Add the spin box for maximal number of attributes"""
        VizRankDialog.__init__(self, master)
        OWComponent.__init__(self, master)

        self.master = master
        self.n_neighbors = 10
        max_n_attrs = len(master.model_selected) + len(master.model_other) - 1

        box = gui.hBox(self)
        self.n_attrs_spin = gui.spin(
            box, self, "n_attrs", 3, max_n_attrs, label="Maximum number of variables: ",
            controlWidth=50, alignment=Qt.AlignRight, callback=self._n_attrs_changed)
        gui.rubber(box)
        self.last_run_n_attrs = None
        self.attr_color = master.attr_color
        self.attr_ordering = None
        self.data = None
        self.valid_data = None

    def initialize(self):
        super().initialize()
        self.attr_color = self.master.attr_color

    def _compute_attr_order(self):
        """
        used by VizRank to evaluate attributes
        """
        master = self.master
        attrs = [v for v in chain(master.model_selected[:], master.model_other[:])
                 if v is not self.attr_color]
        data = self.master.data.transform(Domain(attributes=attrs, class_vars=self.attr_color))
        self.data = data
        self.valid_data = np.hstack((~np.isnan(data.X), ~np.isnan(data.Y.reshape(len(data.Y), 1))))
        relief = ReliefF if self.attr_color.is_discrete else RReliefF
        weights = relief(n_iterations=100, k_nearest=self.minK)(data)
        attrs = sorted(zip(weights, attrs), key=lambda x: (-x[0], x[1].name))
        self.attr_ordering = attr_ordering = [a for _, a in attrs]
        return attr_ordering

    def _evaluate_projection(self, x, y):
        """
        kNNEvaluate - evaluate class separation in the given projection using a k-NN method
        Parameters
        ----------
        x - variables to evaluate
        y - class

        Returns
        -------
        scores
        """
        if self.percent_data_used != 100:
            rand = np.random.choice(len(x), int(len(x) * self.percent_data_used / 100),
                                    replace=False)
            x = x[rand]
            y = y[rand]
        neigh = KNeighborsClassifier(n_neighbors=3) if self.attr_color.is_discrete else \
            KNeighborsRegressor(n_neighbors=3)
        assert ~(np.isnan(x).any(axis=None) | np.isnan(x).any(axis=None))
        neigh.fit(x, y)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            scores = cross_val_score(neigh, x, y, cv=3)
        return scores.mean()

    def _n_attrs_changed(self):
        """
        Change the button label when the number of attributes changes. The method does not reset
        anything so the user can still see the results until actually restarting the search.
        """
        if self.n_attrs != self.last_run_n_attrs or self.saved_state is None:
            self.button.setText("Start")
        else:
            self.button.setText("Continue")
        self.button.setEnabled(self.check_preconditions())

    def progressBarSet(self, value, processEvents=None):
        self.setWindowTitle(self.captionTitle + " Evaluated {} permutations".format(value))
        if processEvents is not None and processEvents is not False:
            qApp.processEvents(processEvents)

    def check_preconditions(self):
        master = self.master
        if not super().check_preconditions():
            return False
        elif not master.btn_vizrank.isEnabled():
            return False
        self.n_attrs_spin.setMaximum(20)  # all primitive vars except color one
        return True

    def on_selection_changed(self, selected, deselected):
        attrs = selected.indexes()[0].data(self._AttrRole)
        self.selectionChanged.emit([attrs])

    def iterate_states(self, state):
        if state is None:  # on the first call, compute order
            self.attrs = self._compute_attr_order()
            state = list(range(3))
        else:
            state = list(state)

        def combinations(n, s):
            while True:
                yield s
                for up, _ in enumerate(s):
                    s[up] += 1
                    if up + 1 == len(s) or s[up] < s[up + 1]:
                        break
                    s[up] = up
                if s[-1] == n:
                    if len(s) < self.n_attrs:
                        s = list(range(len(s) + 1))
                    else:
                        break

        for c in combinations(len(self.attrs), state):
            for p in islice(permutations(c[1:]), factorial(len(c) - 1) // 2):
                yield (c[0],) + p

    def compute_score(self, state):
        attrs = [self.attrs[i] for i in state]
        domain = Domain(attributes=attrs, class_vars=[self.attr_color])
        data = self.data.transform(domain)
        radviz_xy, _, mask = radviz(data, attrs)
        y = data.Y[mask]
        return -self._evaluate_projection(radviz_xy, y)

    def bar_length(self, score):
        return -score

    def row_for_state(self, score, state):
        attrs = [self.attrs[s] for s in state]
        item = QStandardItem("[{:0.6f}] ".format(-score) + ", ".join(a.name for a in attrs))
        item.setData(attrs, self._AttrRole)
        return [item]

    def _update_progress(self):
        self.progressBarSet(int(self.saved_progress))

    def before_running(self):
        """
        Disable the spin for number of attributes before running and
        enable afterwards. Also, if the number of attributes is different than
        in the last run, reset the saved state (if it was paused).
        """
        if self.n_attrs != self.last_run_n_attrs:
            self.saved_state = None
            self.saved_progress = 0
        if self.saved_state is None:
            self.scores = []
            self.rank_model.clear()
        self.last_run_n_attrs = self.n_attrs
        self.n_attrs_spin.setDisabled(True)

    def stopped(self):
        self.n_attrs_spin.setDisabled(False)
Esempio n. 25
0
class OWCorpus(OWWidget):
    name = "Corpus"
    description = "Load a corpus of text documents."
    icon = "icons/TextFile.svg"
    priority = 100
    replaces = ["orangecontrib.text.widgets.owloadcorpus.OWLoadCorpus"]

    class Inputs:
        data = Input('Data', Table)

    class Outputs:
        corpus = Output('Corpus', Corpus)

    want_main_area = False
    resizing_enabled = True

    dlgFormats = ("All readable files ({});;".format(
        '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join(
            "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
            for f in sorted(set(FileFormat.readers.values()),
                            key=list(FileFormat.readers.values()).index)))

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    recent_files = Setting([
        "book-excerpts.tab",
        "grimm-tales-selected.tab",
        "election-tweets-2016.tab",
        "friends-transcripts.tab",
        "andersen.tab",
    ])
    used_attrs = ContextSetting([])

    class Error(OWWidget.Error):
        read_file = Msg("Can't read file {} ({})")
        no_text_features_used = Msg("At least one text feature must be used.")
        corpus_without_text_features = Msg(
            "Corpus doesn't have any textual features.")

    def __init__(self):
        super().__init__()

        self.corpus = None

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)
        self.file_widget = widgets.FileWidget(
            recent_files=self.recent_files,
            icon_size=(16, 16),
            on_open=self.open_file,
            dialog_format=self.dlgFormats,
            dialog_title='Open Orange Document Corpus',
            reload_label='Reload',
            browse_label='Browse',
            allow_empty=False,
            minimal_width=250,
        )
        fbox.layout().addWidget(self.file_widget)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        self.info_label = gui.label(ibox, self, "")
        self.update_info()

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=False)
        self.used_attrs_model = VariableListModel(enable_dnd=True)
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs_model)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs_model
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=False)
        self.unused_attrs_model = VariableListModel(enable_dnd=True)
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs_model)
        ibox.layout().addWidget(self.unused_attrs_view)

        # Documentation Data Sets & Report
        box = gui.hBox(self.controlArea)
        self.browse_documentation = gui.button(
            box,
            self,
            "Browse documentation corpora",
            callback=lambda: self.file_widget.browse(get_sample_corpora_dir()),
            autoDefault=False,
        )
        box.layout().addWidget(self.report_button)

        # load first file
        self.file_widget.select(0)

    @Inputs.data
    def set_data(self, data):
        have_data = data is not None

        # Enable/Disable command when data from input
        self.file_widget.setEnabled(not have_data)
        self.browse_documentation.setEnabled(not have_data)

        if have_data:
            self.open_file(data=data)
        else:
            self.file_widget.reload()

    def open_file(self, path=None, data=None):
        self.closeContext()
        self.Error.clear()
        self.unused_attrs_model[:] = []
        self.used_attrs_model[:] = []
        if data:
            self.corpus = Corpus.from_table(data.domain, data)
        elif path:
            try:
                self.corpus = Corpus.from_file(path)
                self.corpus.name = os.path.splitext(os.path.basename(path))[0]
            except BaseException as err:
                self.Error.read_file(path, str(err))
        else:
            return

        self.update_info()
        self.used_attrs = list(self.corpus.text_features)
        if not self.corpus.text_features:
            self.Error.corpus_without_text_features()
            self.Outputs.corpus.send(None)
            return
        self.openContext(self.corpus)
        self.used_attrs_model.extend(self.used_attrs)
        self.unused_attrs_model.extend([
            f for f in self.corpus.domain.metas
            if f.is_string and f not in self.used_attrs_model
        ])

    def update_info(self):
        def describe(corpus):
            dom = corpus.domain
            text_feats = sum(m.is_string for m in dom.metas)
            other_feats = len(dom.attributes) + len(dom.metas) - text_feats
            text = \
                "{} document(s), {} text features(s), {} other feature(s).". \
                format(len(corpus), text_feats, other_feats)
            if dom.has_continuous_class:
                text += "<br/>Regression; numerical class."
            elif dom.has_discrete_class:
                text += "<br/>Classification; discrete class with {} values.". \
                    format(len(dom.class_var.values))
            elif corpus.domain.class_vars:
                text += "<br/>Multi-target; {} target variables.".format(
                    len(corpus.domain.class_vars))
            else:
                text += "<br/>Data has no target variable."
            text += "</p>"
            return text

        if self.corpus is None:
            self.info_label.setText("No corpus loaded.")
        else:
            self.info_label.setText(describe(self.corpus))

    def update_feature_selection(self):
        self.Error.no_text_features_used.clear()

        # TODO fix VariablesListItemView so it does not emit
        # duplicated data when reordering inside a single window
        def remove_duplicates(l):
            unique = []
            for i in l:
                if i not in unique:
                    unique.append(i)
            return unique

        if self.corpus is not None:
            self.corpus.set_text_features(
                remove_duplicates(self.used_attrs_model))
            self.used_attrs = list(self.used_attrs_model)

            if len(self.unused_attrs_model
                   ) > 0 and not self.corpus.text_features:
                self.Error.no_text_features_used()

            # prevent sending "empty" corpora
            dom = self.corpus.domain
            empty = not (dom.variables or dom.metas) \
                or len(self.corpus) == 0 \
                or not self.corpus.text_features
            self.Outputs.corpus.send(self.corpus if not empty else None)

    def send_report(self):
        def describe(features):
            if len(features):
                return ', '.join([f.name for f in features])
            else:
                return '(none)'

        if self.corpus is not None:
            domain = self.corpus.domain
            self.report_items('Corpus', (
                ("File", self.file_widget.get_selected_filename()),
                ("Documents", len(self.corpus)),
                ("Used text features", describe(self.used_attrs_model)),
                ("Ignored text features", describe(self.unused_attrs_model)),
                ('Other features', describe(domain.attributes)),
                ('Target', describe(domain.class_vars)),
            ))
Esempio n. 26
0
class OWPaintData(OWWidget):
    TOOLS = [("Brush", "Create multiple instances", AirBrushTool,
              _icon("brush.svg")),
             ("Put", "Put individual instances", PutInstanceTool,
              _icon("put.svg")),
             ("Select", "Select and move instances", SelectTool,
              _icon("select-transparent_42px.png")),
             ("Jitter", "Jitter instances", JitterTool, _icon("jitter.svg")),
             ("Magnet", "Attract multiple instances", MagnetTool,
              _icon("magnet.svg")),
             ("Clear", "Clear the plot", ClearTool,
              _icon("../../../icons/Dlg_clear.png"))]

    name = "Paint Data"
    description = "Create data by painting data points on a plane."
    icon = "icons/PaintData.svg"
    priority = 60
    keywords = ["create", "draw"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)

    autocommit = Setting(True)
    table_name = Setting("Painted data")
    attr1 = Setting("x")
    attr2 = Setting("y")
    hasAttr2 = Setting(True)

    brushRadius = Setting(75)
    density = Setting(7)
    symbol_size = Setting(10)

    #: current data array (shape=(N, 3)) as presented on the output
    data = Setting(None, schema_only=True)
    labels = Setting(["C1", "C2"], schema_only=True)

    graph_name = "plot"

    class Warning(OWWidget.Warning):
        no_input_variables = Msg("Input data has no variables")
        continuous_target = Msg("Continuous target value can not be used.")
        sparse_not_supported = Msg("Sparse data is ignored.")
        renamed_vars = Msg("Some variables have been renamed "
                           "to avoid duplicates.\n{}")

    class Information(OWWidget.Information):
        use_first_two = \
            Msg("Paint Data uses data from the first two attributes.")

    def __init__(self):
        super().__init__()

        self.input_data = None
        self.input_classes = []
        self.input_colors = None
        self.input_has_attr2 = True
        self.current_tool = None
        self._selected_indices = None
        self._scatter_item = None
        #: A private data buffer (can be modified in place). `self.data` is
        #: a copy of this array (as seen when the `invalidate` method is
        #: called
        self.__buffer = None

        self.undo_stack = QUndoStack(self)

        self.class_model = ColoredListModel(
            self.labels,
            self,
            flags=Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsEditable)

        self.class_model.dataChanged.connect(self._class_value_changed)
        self.class_model.rowsInserted.connect(self._class_count_changed)
        self.class_model.rowsRemoved.connect(self._class_count_changed)

        if not self.data:
            self.data = []
            self.__buffer = np.zeros((0, 3))
        elif isinstance(self.data, np.ndarray):
            self.__buffer = self.data.copy()
            self.data = self.data.tolist()
        else:
            self.__buffer = np.array(self.data)

        self.colors = colorpalettes.DefaultRGBColors
        self.tools_cache = {}

        self._init_ui()
        self.commit()

    def _init_ui(self):
        namesBox = gui.vBox(self.controlArea, "Names")

        hbox = gui.hBox(namesBox, margin=0, spacing=0)
        gui.lineEdit(hbox,
                     self,
                     "attr1",
                     "Variable X: ",
                     controlWidth=80,
                     orientation=Qt.Horizontal,
                     callback=self._attr_name_changed)
        gui.separator(hbox, 21)
        hbox = gui.hBox(namesBox, margin=0, spacing=0)
        attr2 = gui.lineEdit(hbox,
                             self,
                             "attr2",
                             "Variable Y: ",
                             controlWidth=80,
                             orientation=Qt.Horizontal,
                             callback=self._attr_name_changed)
        gui.separator(hbox)
        gui.checkBox(hbox,
                     self,
                     "hasAttr2",
                     '',
                     disables=attr2,
                     labelWidth=0,
                     callback=self.set_dimensions)
        gui.separator(namesBox)

        gui.widgetLabel(namesBox, "Labels")
        self.classValuesView = listView = gui.ListViewWithSizeHint(
            preferred_size=(-1, 30))
        listView.setModel(self.class_model)
        itemmodels.select_row(listView, 0)
        namesBox.layout().addWidget(listView)

        self.addClassLabel = QAction("+",
                                     self,
                                     toolTip="Add new class label",
                                     triggered=self.add_new_class_label)

        self.removeClassLabel = QAction(
            unicodedata.lookup("MINUS SIGN"),
            self,
            toolTip="Remove selected class label",
            triggered=self.remove_selected_class_label)

        actionsWidget = itemmodels.ModelActionsWidget(
            [self.addClassLabel, self.removeClassLabel], self)
        actionsWidget.layout().addStretch(10)
        actionsWidget.layout().setSpacing(1)
        namesBox.layout().addWidget(actionsWidget)

        tBox = gui.vBox(self.controlArea, "Tools", addSpace=True)
        buttonBox = gui.hBox(tBox)
        toolsBox = gui.widgetBox(buttonBox, orientation=QGridLayout())

        self.toolActions = QActionGroup(self)
        self.toolActions.setExclusive(True)
        self.toolButtons = []

        for i, (name, tooltip, tool, icon) in enumerate(self.TOOLS):
            action = QAction(
                name,
                self,
                toolTip=tooltip,
                checkable=tool.checkable,
                icon=QIcon(icon),
            )
            action.triggered.connect(partial(self.set_current_tool, tool))

            button = QToolButton(iconSize=QSize(24, 24),
                                 toolButtonStyle=Qt.ToolButtonTextUnderIcon,
                                 sizePolicy=QSizePolicy(
                                     QSizePolicy.MinimumExpanding,
                                     QSizePolicy.Fixed))
            button.setDefaultAction(action)
            self.toolButtons.append((button, tool))

            toolsBox.layout().addWidget(button, i / 3, i % 3)
            self.toolActions.addAction(action)

        for column in range(3):
            toolsBox.layout().setColumnMinimumWidth(column, 10)
            toolsBox.layout().setColumnStretch(column, 1)

        undo = self.undo_stack.createUndoAction(self)
        redo = self.undo_stack.createRedoAction(self)

        undo.setShortcut(QKeySequence.Undo)
        redo.setShortcut(QKeySequence.Redo)

        self.addActions([undo, redo])
        self.undo_stack.indexChanged.connect(self.invalidate)

        gui.separator(tBox)
        indBox = gui.indentedBox(tBox, sep=8)
        form = QFormLayout(formAlignment=Qt.AlignLeft,
                           labelAlignment=Qt.AlignLeft,
                           fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow)
        indBox.layout().addLayout(form)
        slider = gui.hSlider(indBox,
                             self,
                             "brushRadius",
                             minValue=1,
                             maxValue=100,
                             createLabel=False)
        form.addRow("Radius:", slider)

        slider = gui.hSlider(indBox,
                             self,
                             "density",
                             None,
                             minValue=1,
                             maxValue=100,
                             createLabel=False)

        form.addRow("Intensity:", slider)

        slider = gui.hSlider(indBox,
                             self,
                             "symbol_size",
                             None,
                             minValue=1,
                             maxValue=100,
                             createLabel=False,
                             callback=self.set_symbol_size)

        form.addRow("Symbol:", slider)

        self.btResetToInput = gui.button(tBox, self, "Reset to Input Data",
                                         self.reset_to_input)
        self.btResetToInput.setDisabled(True)

        gui.auto_send(self.controlArea, self, "autocommit")

        # main area GUI
        viewbox = PaintViewBox(enableMouse=False)
        self.plotview = pg.PlotWidget(background="w", viewBox=viewbox)
        self.plotview.sizeHint = lambda: QSize(
            200, 100)  # Minimum size for 1-d painting
        self.plot = self.plotview.getPlotItem()

        axis_color = self.palette().color(QPalette.Text)
        axis_pen = QPen(axis_color)

        tickfont = QFont(self.font())
        tickfont.setPixelSize(max(int(tickfont.pixelSize() * 2 // 3), 11))

        axis = self.plot.getAxis("bottom")
        axis.setLabel(self.attr1)
        axis.setPen(axis_pen)
        axis.setTickFont(tickfont)

        axis = self.plot.getAxis("left")
        axis.setLabel(self.attr2)
        axis.setPen(axis_pen)
        axis.setTickFont(tickfont)
        if not self.hasAttr2:
            self.plot.hideAxis('left')

        self.plot.hideButtons()
        self.plot.setXRange(0, 1, padding=0.01)

        self.mainArea.layout().addWidget(self.plotview)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        # enable brush tool
        self.toolActions.actions()[0].setChecked(True)
        self.set_current_tool(self.TOOLS[0][2])

        self.set_dimensions()

    def set_symbol_size(self):
        if self._scatter_item:
            self._scatter_item.setSize(self.symbol_size)

    def set_dimensions(self):
        if self.hasAttr2:
            self.plot.setYRange(0, 1, padding=0.01)
            self.plot.showAxis('left')
            self.plotview.setSizePolicy(QSizePolicy.Expanding,
                                        QSizePolicy.Minimum)
        else:
            self.plot.setYRange(-.5, .5, padding=0.01)
            self.plot.hideAxis('left')
            self.plotview.setSizePolicy(QSizePolicy.Expanding,
                                        QSizePolicy.Maximum)
        self._replot()
        for button, tool in self.toolButtons:
            if tool.only2d:
                button.setDisabled(not self.hasAttr2)

    @Inputs.data
    def set_data(self, data):
        """Set the input_data and call reset_to_input"""
        def _check_and_set_data(data):
            self.clear_messages()
            if data and data.is_sparse():
                self.Warning.sparse_not_supported()
                return False
            if data:
                if not data.domain.attributes:
                    self.Warning.no_input_variables()
                    data = None
                elif len(data.domain.attributes) > 2:
                    self.Information.use_first_two()
                self.info.set_input_summary(len(data),
                                            format_summary_details(data))
            self.input_data = data
            self.btResetToInput.setDisabled(data is None)
            return bool(data)

        if not _check_and_set_data(data):
            self.info.set_input_summary(self.info.NoInput)
            return

        X = np.array([scale(vals) for vals in data.X[:, :2].T]).T
        try:
            y = next(cls for cls in data.domain.class_vars if cls.is_discrete)
        except StopIteration:
            if data.domain.class_vars:
                self.Warning.continuous_target()
            self.input_classes = ["C1"]
            self.input_colors = None
            y = np.zeros(len(data))
        else:
            self.input_classes = y.values
            self.input_colors = y.palette

            y = data[:, y].Y

        self.input_has_attr2 = len(data.domain.attributes) >= 2
        if not self.input_has_attr2:
            self.input_data = np.column_stack((X, np.zeros(len(data)), y))
        else:
            self.input_data = np.column_stack((X, y))
        self.reset_to_input()
        self.unconditional_commit()

    def reset_to_input(self):
        """Reset the painting to input data if present."""
        if self.input_data is None:
            return
        self.undo_stack.clear()

        index = self.selected_class_label()
        if self.input_colors is not None:
            palette = self.input_colors
        else:
            palette = colorpalettes.DefaultRGBColors
        self.colors = palette
        self.class_model.colors = palette
        self.class_model[:] = self.input_classes

        newindex = min(max(index, 0), len(self.class_model) - 1)
        itemmodels.select_row(self.classValuesView, newindex)

        self.data = self.input_data.tolist()
        self.__buffer = self.input_data.copy()

        prev_attr2 = self.hasAttr2
        self.hasAttr2 = self.input_has_attr2
        if prev_attr2 != self.hasAttr2:
            self.set_dimensions()
        else:  # set_dimensions already calls _replot, no need to call it again
            self._replot()

        self.commit()

    def add_new_class_label(self, undoable=True):

        newlabel = next(label for label in namegen('C', 1)
                        if label not in self.class_model)

        command = SimpleUndoCommand(lambda: self.class_model.append(newlabel),
                                    lambda: self.class_model.__delitem__(-1))
        if undoable:
            self.undo_stack.push(command)
        else:
            command.redo()

    def remove_selected_class_label(self):
        index = self.selected_class_label()

        if index is None:
            return

        label = self.class_model[index]
        mask = self.__buffer[:, 2] == index
        move_mask = self.__buffer[~mask][:, 2] > index

        self.undo_stack.beginMacro("Delete class label")
        self.undo_stack.push(UndoCommand(DeleteIndices(mask), self))
        self.undo_stack.push(UndoCommand(Move((move_mask, 2), -1), self))
        self.undo_stack.push(
            SimpleUndoCommand(lambda: self.class_model.__delitem__(index),
                              lambda: self.class_model.insert(index, label)))
        self.undo_stack.endMacro()

        newindex = min(max(index - 1, 0), len(self.class_model) - 1)
        itemmodels.select_row(self.classValuesView, newindex)

    def _class_count_changed(self):
        self.labels = list(self.class_model)
        self.removeClassLabel.setEnabled(len(self.class_model) > 1)
        self.addClassLabel.setEnabled(len(self.class_model) < len(self.colors))
        if self.selected_class_label() is None:
            itemmodels.select_row(self.classValuesView, 0)

    def _class_value_changed(self, index, _):
        index = index.row()
        newvalue = self.class_model[index]
        oldvalue = self.labels[index]
        if newvalue != oldvalue:
            self.labels[index] = newvalue


#             command = Command(
#                 lambda: self.class_model.__setitem__(index, newvalue),
#                 lambda: self.class_model.__setitem__(index, oldvalue),
#             )
#             self.undo_stack.push(command)

    def selected_class_label(self):
        rows = self.classValuesView.selectedIndexes()
        if rows:
            return rows[0].row()
        return None

    def set_current_tool(self, tool):
        prev_tool = self.current_tool.__class__

        if self.current_tool is not None:
            self.current_tool.deactivate()
            self.current_tool.editingStarted.disconnect(
                self._on_editing_started)
            self.current_tool.editingFinished.disconnect(
                self._on_editing_finished)
            self.current_tool = None
            self.plot.getViewBox().tool = None

        if tool not in self.tools_cache:
            newtool = tool(self, self.plot)
            self.tools_cache[tool] = newtool
            newtool.issueCommand.connect(self._add_command)

        self.current_tool = tool = self.tools_cache[tool]
        self.plot.getViewBox().tool = tool
        tool.editingStarted.connect(self._on_editing_started)
        tool.editingFinished.connect(self._on_editing_finished)
        tool.activate()

        if not tool.checkable:
            self.set_current_tool(prev_tool)

    def _on_editing_started(self):
        self.undo_stack.beginMacro("macro")

    def _on_editing_finished(self):
        self.undo_stack.endMacro()

    def execute(self, command):
        assert isinstance(command, (Append, DeleteIndices, Insert, Move)), \
            "Non normalized command"
        if isinstance(command, (DeleteIndices, Insert)):
            self._selected_indices = None

            if isinstance(self.current_tool, SelectTool):
                self.current_tool.reset()

        self.__buffer, undo = transform(command, self.__buffer)
        self._replot()
        return undo

    def _add_command(self, cmd):
        # pylint: disable=too-many-branches
        name = "Name"

        if (not self.hasAttr2
                and isinstance(cmd, (Move, MoveSelection, Jitter, Magnet))):
            # tool only supported if both x and y are enabled
            return

        if isinstance(cmd, Append):
            cls = self.selected_class_label()
            points = np.array([(p.x(), p.y() if self.hasAttr2 else 0, cls)
                               for p in cmd.points])
            self.undo_stack.push(UndoCommand(Append(points), self, text=name))
        elif isinstance(cmd, Move):
            self.undo_stack.push(UndoCommand(cmd, self, text=name))
        elif isinstance(cmd, SelectRegion):
            indices = [
                i for i, (x, y) in enumerate(self.__buffer[:, :2])
                if cmd.region.contains(QPointF(x, y))
            ]
            indices = np.array(indices, dtype=int)
            self._selected_indices = indices
        elif isinstance(cmd, DeleteSelection):
            indices = self._selected_indices
            if indices is not None and indices.size:
                self.undo_stack.push(
                    UndoCommand(DeleteIndices(indices), self, text="Delete"))
        elif isinstance(cmd, MoveSelection):
            indices = self._selected_indices
            if indices is not None and indices.size:
                self.undo_stack.push(
                    UndoCommand(Move((self._selected_indices, slice(0, 2)),
                                     np.array([cmd.delta.x(),
                                               cmd.delta.y()])),
                                self,
                                text="Move"))
        elif isinstance(cmd, DeleteIndices):
            self.undo_stack.push(UndoCommand(cmd, self, text="Delete"))
        elif isinstance(cmd, Insert):
            self.undo_stack.push(UndoCommand(cmd, self))
        elif isinstance(cmd, AirBrush):
            data = create_data(cmd.pos.x(), cmd.pos.y(),
                               self.brushRadius / 1000,
                               int(1 + self.density / 20), cmd.rstate)
            self._add_command(Append([QPointF(*p) for p in zip(*data.T)]))
        elif isinstance(cmd, Jitter):
            point = np.array([cmd.pos.x(), cmd.pos.y()])
            delta = -apply_jitter(self.__buffer[:, :2], point,
                                  self.density / 100.0, 0, cmd.rstate)
            self._add_command(Move((..., slice(0, 2)), delta))
        elif isinstance(cmd, Magnet):
            point = np.array([cmd.pos.x(), cmd.pos.y()])
            delta = -apply_attractor(self.__buffer[:, :2], point,
                                     self.density / 100.0, 0)
            self._add_command(Move((..., slice(0, 2)), delta))
        else:
            assert False, "unreachable"

    def _replot(self):
        def pen(color):
            pen = QPen(color, 1)
            pen.setCosmetic(True)
            return pen

        if self._scatter_item is not None:
            self.plot.removeItem(self._scatter_item)
            self._scatter_item = None

        x = self.__buffer[:, 0].copy()
        if self.hasAttr2:
            y = self.__buffer[:, 1].copy()
        else:
            y = np.zeros(self.__buffer.shape[0])

        colors = self.colors[self.__buffer[:, 2]]
        pens = [pen(c) for c in colors]
        brushes = [QBrush(c) for c in colors]

        self._scatter_item = pg.ScatterPlotItem(x,
                                                y,
                                                symbol="+",
                                                brush=brushes,
                                                pen=pens)
        self.plot.addItem(self._scatter_item)
        self.set_symbol_size()

    def _attr_name_changed(self):
        self.plot.getAxis("bottom").setLabel(self.attr1)
        self.plot.getAxis("left").setLabel(self.attr2)
        self.invalidate()

    def invalidate(self):
        self.data = self.__buffer.tolist()
        self.commit()

    def commit(self):
        self.Warning.renamed_vars.clear()

        if not self.data:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        data = np.array(self.data)
        if self.hasAttr2:
            X, Y = data[:, :2], data[:, 2]
            proposed = [self.attr1.strip(), self.attr2.strip()]
        else:
            X, Y = data[:, np.newaxis, 0], data[:, 2]
            proposed = [self.attr1.strip()]

        if len(np.unique(Y)) >= 2:
            proposed.append("Class")
            unique_names, renamed = get_unique_names_duplicates(proposed, True)
            domain = Domain((map(ContinuousVariable, unique_names[:-1])),
                            DiscreteVariable(unique_names[-1],
                                             values=tuple(self.class_model)))
            data = Table.from_numpy(domain, X, Y)
        else:
            unique_names, renamed = get_unique_names_duplicates(proposed, True)
            domain = Domain(map(ContinuousVariable, unique_names))
            data = Table.from_numpy(domain, X)

        if renamed:
            self.Warning.renamed_vars(", ".join(renamed))
            self.plot.getAxis("bottom").setLabel(unique_names[0])
            self.plot.getAxis("left").setLabel(unique_names[1])

        data.name = self.table_name
        self.Outputs.data.send(data)
        self.info.set_output_summary(len(data), format_summary_details(data))

    def sizeHint(self):
        sh = super().sizeHint()
        return sh.expandedTo(QSize(570, 690))

    def onDeleteWidget(self):
        self.undo_stack.indexChanged.disconnect(self.invalidate)
        self.plot.clear()

    def send_report(self):
        if self.data is None:
            return
        settings = []
        if self.attr1 != "x" or self.attr2 != "y":
            settings += [("Axis x", self.attr1), ("Axis y", self.attr2)]
        settings += [("Number of points", len(self.data))]
        self.report_items("Painted data", settings)
        self.report_plot()
Esempio n. 27
0
class OWContinuize(widget.OWWidget):
    name = "Continuize"
    description = ("Transform categorical attributes into numeric and, " +
                   "optionally, normalize numeric values.")
    icon = "icons/Continuize.svg"
    category = "Data"
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    want_main_area = False
    buttons_area_orientation = Qt.Vertical
    resizing_enabled = False

    multinomial_treatment = Setting(0)
    zero_based = Setting(1)
    continuous_treatment = Setting(0)
    class_treatment = Setting(0)

    transform_class = Setting(False)

    autosend = Setting(True)

    multinomial_treats = (("Target or first value as base",
                           Continuize.FirstAsBase),
                          ("Most frequent value as base",
                           Continuize.FrequentAsBase),
                          ("One attribute per value", Continuize.Indicators),
                          ("Ignore multinomial attributes",
                           Continuize.RemoveMultinomial),
                          ("Remove categorical attributes",
                           Continuize.Remove), ("Treat as ordinal",
                                                Continuize.AsOrdinal),
                          ("Divide by number of values",
                           Continuize.AsNormalizedOrdinal))

    continuous_treats = (("Leave them as they are", Continuize.Leave),
                         ("Normalize by span", Normalize.NormalizeBySpan),
                         ("Normalize by standard deviation",
                          Normalize.NormalizeBySD))

    class_treats = (
        ("Leave it as it is", Continuize.Leave),
        ("Treat as ordinal", Continuize.AsOrdinal),
        ("Divide by number of values", Continuize.AsNormalizedOrdinal),
        ("One class per value", Continuize.Indicators),
    )

    value_ranges = ["From -1 to 1", "From 0 to 1"]

    def __init__(self):
        super().__init__()

        box = gui.vBox(self.controlArea, "Categorical Features")
        gui.radioButtonsInBox(
            box,
            self,
            "multinomial_treatment",
            btnLabels=[x[0] for x in self.multinomial_treats],
            callback=self.settings_changed)

        box = gui.vBox(self.controlArea, "Numeric Features")
        gui.radioButtonsInBox(box,
                              self,
                              "continuous_treatment",
                              btnLabels=[x[0] for x in self.continuous_treats],
                              callback=self.settings_changed)

        box = gui.vBox(self.controlArea, "Categorical Outcomes")
        gui.radioButtonsInBox(box,
                              self,
                              "class_treatment",
                              btnLabels=[t[0] for t in self.class_treats],
                              callback=self.settings_changed)

        zbbox = gui.vBox(self.controlArea, "Value Range")

        gui.radioButtonsInBox(zbbox,
                              self,
                              "zero_based",
                              btnLabels=self.value_ranges,
                              callback=self.settings_changed)

        gui.auto_commit(self.buttonsArea, self, "autosend", "Apply", box=False)

        self.data = None

    def settings_changed(self):
        self.commit()

    @Inputs.data
    @check_sql_input
    def setData(self, data):
        self.data = data
        if data is None:
            self.Outputs.data.send(None)
        else:
            self.unconditional_commit()

    def constructContinuizer(self):
        conzer = DomainContinuizer(
            zero_based=self.zero_based,
            multinomial_treatment=self.multinomial_treats[
                self.multinomial_treatment][1],
            continuous_treatment=self.continuous_treats[
                self.continuous_treatment][1],
            class_treatment=self.class_treats[self.class_treatment][1])
        return conzer

    # def sendPreprocessor(self):
    #     continuizer = self.constructContinuizer()
    #     self.send("Preprocessor", PreprocessedLearner(
    #         lambda data, weightId=0, tc=(self.targetValue if self.classTreatment else -1):
    #             Table(continuizer(data, weightId, tc)
    #                 if data.domain.has_discrete_class
    #                 else continuizer(data, weightId), data)))

    def commit(self):
        continuizer = self.constructContinuizer()
        if self.data:
            domain = continuizer(self.data)
            data = self.data.transform(domain)
            self.Outputs.data.send(data)
        else:
            self.Outputs.data.send(self.data)  # None or empty data

    def send_report(self):
        self.report_items(
            "Settings",
            [("Categorical features",
              self.multinomial_treats[self.multinomial_treatment][0]),
             ("Numeric features",
              self.continuous_treats[self.continuous_treatment][0]),
             ("Class", self.class_treats[self.class_treatment][0]),
             ("Value range", self.value_ranges[self.zero_based])])
Esempio n. 28
0
class OWMergeData(widget.OWWidget):
    name = "Merge Data"
    description = "Merge datasets based on the values of selected features."
    icon = "icons/MergeData.svg"
    priority = 1110
    keywords = ["join"]

    class Inputs:
        data = Input("Data",
                     Orange.data.Table,
                     default=True,
                     replaces=["Data A"])
        extra_data = Input("Extra Data",
                           Orange.data.Table,
                           replaces=["Data B"])

    class Outputs:
        data = Output(
            "Data",
            Orange.data.Table,
            replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"])

    LeftJoin, InnerJoin, OuterJoin = range(3)
    OptionNames = ("Append columns from Extra data",
                   "Find matching pairs of rows", "Concatenate tables")
    OptionDescriptions = (
        "The first table may contain, for instance, city names,\n"
        "and the second would be a list of cities and their coordinates.\n"
        "Columns with coordinates would then be appended to the output.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains matched instances. Rows without matches are removed.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains all instances. Data from merged instances is "
        "merged into single rows.")

    UserAdviceMessages = [
        widget.Message("Confused about merging options?\nSee the tooltips!",
                       "merging_types")
    ]

    settingsHandler = MergeDataContextHandler()
    attr_pairs = ContextSetting(None, schema_only=True)
    merging = Setting(LeftJoin)
    auto_apply = Setting(True)
    settings_version = 2

    want_main_area = False
    resizing_enabled = False

    class Warning(widget.OWWidget.Warning):
        renamed_vars = Msg("Some variables have been renamed "
                           "to avoid duplicates.\n{}")

    class Error(widget.OWWidget.Error):
        matching_numeric_with_nonnum = Msg(
            "Numeric and non-numeric columns ({} and {}) cannot be matched.")
        matching_index_with_sth = Msg("Row index cannot be matched with {}.")
        matching_id_with_sth = Msg("Instance cannot be matched with {}.")
        nonunique_left = Msg(
            "Some combinations of values on the left appear in multiple rows.\n"
            "For this type of merging, every possible combination of values "
            "on the left should appear at most once.")
        nonunique_right = Msg(
            "Some combinations of values on the right appear in multiple rows."
            "\n"
            "Every possible combination of values on the right should appear "
            "at most once.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.extra_data = None

        content = [
            INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES,
            DomainModel.METAS
        ]
        self.model = DomainModelWithTooltips(content)
        self.extra_model = DomainModelWithTooltips(content)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        grp = gui.radioButtons(self.controlArea,
                               self,
                               "merging",
                               box="Merging",
                               btnLabels=self.OptionNames,
                               tooltips=self.OptionDescriptions,
                               callback=self.change_merging)

        self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "",
                                       "matches")
        self.attr_boxes.add_row()
        box = gui.vBox(self.controlArea, box="Row matching")
        box.layout().addWidget(self.attr_boxes)

        gui.auto_apply(self.buttonsArea, self)
        # connect after wrapping self.commit with gui.auto_commit!
        self.attr_boxes.vars_changed.connect(self.commit)
        self.attr_boxes.vars_changed.connect(self.store_combo_state)
        self.settingsAboutToBePacked.connect(self.store_combo_state)

    def change_merging(self):
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.data = data
        self.model.set_domain(data.domain if data else None)

    @Inputs.extra_data
    @check_sql_input
    def set_extra_data(self, data):
        self.extra_data = data
        self.extra_model.set_domain(data.domain if data else None)

    def store_combo_state(self):
        self.attr_pairs = self.attr_boxes.current_state()

    def handleNewSignals(self):
        self.closeContext()
        self.attr_pairs = [self._find_best_match()]
        self.openContext(self.data and self.data.domain, self.extra_data
                         and self.extra_data.domain)
        self.attr_boxes.set_state(self.attr_pairs)

        summary, details, kwargs = self.info.NoInput, "", {}
        if self.data or self.extra_data:
            n_data = len(self.data) if self.data else 0
            n_extra_data = len(self.extra_data) if self.extra_data else 0
            summary = f"{self.info.format_number(n_data)}, " \
                      f"{self.info.format_number(n_extra_data)}"
            kwargs = {"format": Qt.RichText}
            details = format_multiple_summaries([("Data", self.data),
                                                 ("Extra data",
                                                  self.extra_data)])
        self.info.set_input_summary(summary, details, **kwargs)

        self.unconditional_commit()

    def _find_best_match(self):
        def get_unique_str_metas_names(model_):
            return [m for m in model_ if isinstance(m, StringVariable)]

        attr, extra_attr, n_max_intersect = INDEX, INDEX, 0
        str_metas = get_unique_str_metas_names(self.model)
        extra_str_metas = get_unique_str_metas_names(self.extra_model)
        for m_a, m_b in product(str_metas, extra_str_metas):
            col = self.data[:, m_a].metas
            extra_col = self.extra_data[:, m_b].metas
            if col.size and extra_col.size \
                    and isinstance(col[0][0], str) \
                    and isinstance(extra_col[0][0], str):
                n_inter = len(np.intersect1d(col, extra_col))
                if n_inter > n_max_intersect:
                    n_max_intersect, attr, extra_attr = n_inter, m_a, m_b
        return attr, extra_attr

    def commit(self):
        self.clear_messages()
        merged = self.merge() if self.data and self.extra_data else None
        self.Outputs.data.send(merged)
        details = format_summary_details(merged) if merged else ""
        summary = len(merged) if merged else self.info.NoOutput
        self.info.set_output_summary(summary, details)

    def send_report(self):
        # pylint: disable=invalid-sequence-index
        self.report_items(
            (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join(
                f"{self._get_col_name(left)} with {self._get_col_name(right)}"
                for left, right in self.attr_boxes.current_state()))))

    def merge(self):
        # pylint: disable=invalid-sequence-index
        pairs = self.attr_boxes.current_state()
        if not self._check_pair_types(pairs):
            return None
        left_vars, right_vars = zip(*pairs)
        left_mask = np.full(len(self.data), True)
        left = np.vstack(
            tuple(
                self._values(self.data, var, left_mask)
                for var in left_vars)).T
        right_mask = np.full(len(self.extra_data), True)
        right = np.vstack(
            tuple(
                self._values(self.extra_data, var, right_mask)
                for var in right_vars)).T
        if not self._check_uniqueness(left, left_mask, right, right_mask):
            return None
        method = self._merge_methods[self.merging]
        lefti, righti, rightu = method(self, left, left_mask, right,
                                       right_mask)
        reduced_extra_data = \
            self._compute_reduced_extra_data(right_vars, lefti, righti, rightu)
        return self._join_table_by_indices(reduced_extra_data, lefti, righti,
                                           rightu)

    def _check_pair_types(self, pairs):
        for left, right in pairs:
            if isinstance(left, ContinuousVariable) \
                    != isinstance(right, ContinuousVariable):
                self.Error.matching_numeric_with_nonnum(left, right)
                return False
            if INDEX in (left, right) and left != right:
                self.Error.matching_index_with_sth(
                    self._get_col_name(({left, right} - {INDEX}).pop()))
                return False
            if INSTANCEID in (left, right) and left != right:
                self.Error.matching_id_with_sth(
                    self._get_col_name(({left, right} - {INSTANCEID}).pop()))
                return False
        return True

    @staticmethod
    def _get_col_name(obj):
        return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower()

    def _check_uniqueness(self, left, left_mask, right, right_mask):
        ok = True
        masked_right = right[right_mask]
        if len(set(map(tuple, masked_right))) != len(masked_right):
            self.Error.nonunique_right()
            ok = False
        if self.merging != self.LeftJoin:
            masked_left = left[left_mask]
            if len(set(map(tuple, masked_left))) != len(masked_left):
                self.Error.nonunique_left()
                ok = False
        return ok

    def _compute_reduced_extra_data(self, right_match_vars, lefti, righti,
                                    rightu):
        """Prepare a table with extra columns that will appear in the merged
        table"""
        domain = self.data.domain
        extra_domain = self.extra_data.domain

        def var_needed(var):
            if rightu is not None and rightu.size:
                return True
            if var in right_match_vars and self.merging != self.OuterJoin:
                return False
            if var not in domain:
                return True
            both_defined = (lefti != -1) * (righti != -1)
            left_col = \
                self.data.get_column_view(var)[0][lefti[both_defined]]
            right_col = \
                self.extra_data.get_column_view(var)[0][righti[both_defined]]
            if var.is_primitive():
                left_col = left_col.astype(float)
                right_col = right_col.astype(float)
                mask_left = np.isfinite(left_col)
                mask_right = np.isfinite(right_col)
                return not (np.all(mask_left == mask_right) and np.all(
                    left_col[mask_left] == right_col[mask_right]))
            else:
                return not np.all(left_col == right_col)

        extra_vars = [
            var for var in chain(extra_domain.variables, extra_domain.metas)
            if var_needed(var)
        ]
        return self.extra_data[:, extra_vars]

    @staticmethod
    def _values(data, var, mask):
        """Return an iterotor over keys for rows of the table."""
        if var == INDEX:
            return np.arange(len(data))
        if var == INSTANCEID:
            return np.fromiter((inst.id for inst in data),
                               count=len(data),
                               dtype=np.int)
        col = data.get_column_view(var)[0]
        if var.is_primitive():
            col = col.astype(float, copy=False)
            nans = np.isnan(col)
            mask *= ~nans
            if var.is_discrete:
                col = col.astype(int)
                col[nans] = len(var.values)
                col = np.array(var.values + (np.nan, ))[col]
        else:
            col = col.copy()
            defined = col.astype(bool)
            mask *= defined
            col[~mask] = np.nan
        return col

    def _left_join_indices(self, left, left_mask, right, right_mask):
        """Compute a two-row array of indices:
        - the first row contains indices for the primary table,
        - the second row contains the matching rows in the extra table or -1"""
        data = self.data
        # Don't match nans. This is needed since numpy may change nan to string
        # nan, so nan's will match each other
        indices = np.arange(len(right))
        indices[~right_mask] = -1
        if right.shape[1] == 1:
            # The more common case can be handled faster
            right_map = dict(zip(right.flatten(), indices))
            righti = (right_map.get(val, -1) for val in left.flatten())
        else:
            right_map = dict(zip(map(tuple, right), indices))
            righti = (right_map.get(tuple(val), -1) for val in left)
        righti = np.fromiter(righti, dtype=np.int64, count=len(data))
        lefti = np.arange(len(data), dtype=np.int64)
        righti[lefti[~left_mask]] = -1
        return lefti, righti, None

    def _inner_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then remove those with no match in the second table"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        mask = righti != [-1]
        return lefti[mask], righti[mask], None

    def _outer_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then add rows in the second table without a match in the first"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        unused = np.full(len(right), True)
        unused[righti] = False
        if len(right) - 1 not in righti:
            # righti can include -1, which sets the last element as used
            unused[-1] = True
        return lefti, righti, np.nonzero(unused)[0]

    _merge_methods = [
        _left_join_indices, _inner_join_indices, _outer_join_indices
    ]

    def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
        """Join (horizontally) self.data and reduced_extra, taking the pairs
        of rows given in indices"""
        if not lefti.size:
            return None
        lt_dom = self.data.domain
        xt_dom = reduced_extra.domain
        domain = self._domain_rename_duplicates(
            lt_dom.attributes + xt_dom.attributes,
            lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas)
        X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti,
                                        righti)
        Y = self._join_array_by_indices(np.c_[self.data.Y],
                                        np.c_[reduced_extra.Y], lefti, righti)
        string_cols = [
            i for i, var in enumerate(domain.metas) if var.is_string
        ]
        metas = self._join_array_by_indices(self.data.metas,
                                            reduced_extra.metas, lefti, righti,
                                            string_cols)
        if rightu is not None:
            # This domain is used for transforming the extra rows for outer join
            # It must use the original - not renamed - variables from right, so
            # values are copied,
            # but new domain for the left, so renamed values are *not* copied
            right_domain = Orange.data.Domain(
                domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes,
                domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars,
                domain.metas[:len(lt_dom.metas)] + xt_dom.metas)
            extras = self.extra_data[rightu].transform(right_domain)
            X = np.vstack((X, extras.X))
            extras_Y = extras.Y
            if extras_Y.ndim == 1:
                extras_Y = extras_Y.reshape(-1, 1)
            Y = np.vstack((Y, extras_Y))
            metas = np.vstack((metas, extras.metas))
        table = Orange.data.Table.from_numpy(domain, X, Y, metas)
        table.name = getattr(self.data, 'name', '')
        table.attributes = getattr(self.data, 'attributes', {})
        if rightu is not None:
            table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu]))
        else:
            table.ids = self.data.ids[lefti]

        return table

    def _domain_rename_duplicates(self, attributes, class_vars, metas):
        """Check for duplicate variable names in domain. If any, rename
        the variables, by replacing them with new ones (names are
        appended a number). """
        attrs, cvars, mets = [], [], []
        n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len(
            metas)
        lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas

        all_vars = attributes + class_vars + metas
        proposed_names = [m.name for m in all_vars]
        unique_names = get_unique_names_duplicates(proposed_names)
        duplicates = set()
        for p_name, u_name, var, c in zip(proposed_names, unique_names,
                                          all_vars, lists):
            if p_name != u_name:
                duplicates.add(p_name)
                var = var.copy(name=u_name)
            c.append(var)
        if duplicates:
            self.Warning.renamed_vars(", ".join(duplicates))
        return Orange.data.Domain(attrs, cvars, mets)

    @staticmethod
    def _join_array_by_indices(left, right, lefti, righti, string_cols=None):
        """Join (horizontally) two arrays, taking pairs of rows given in indices
        """
        def prepare(arr, inds, str_cols):
            try:
                newarr = arr[inds]
            except IndexError:
                newarr = np.full_like(arr, np.nan)
            else:
                empty = np.full(arr.shape[1], np.nan)
                if str_cols:
                    assert arr.dtype == object
                    empty = empty.astype(object)
                    empty[str_cols] = ''
                newarr[inds == -1] = empty
            return newarr

        left_width = left.shape[1]
        str_left = [i for i in string_cols or () if i < left_width]
        str_right = [
            i - left_width for i in string_cols or () if i >= left_width
        ]
        res = hstack((prepare(left, lefti,
                              str_left), prepare(right, righti, str_right)))
        return res

    @staticmethod
    def migrate_settings(settings, version=None):
        def mig_value(x):
            if x == "Position (index)":
                return INDEX
            if x == "Source position (index)":
                return INSTANCEID
            return x

        if not version:
            operations = ("augment", "merge", "combine")
            oper = operations[settings["merging"]]
            settings["attr_pairs"] = (True, True, [
                (mig_value(settings[f"attr_{oper}_data"]),
                 mig_value(settings[f"attr_{oper}_extra"]))
            ])
            for oper in operations:
                del settings[f"attr_{oper}_data"]
                del settings[f"attr_{oper}_extra"]

        if not version or version < 2 and "attr_pairs" in settings:
            data_exists, extra_exists, attr_pairs = settings.pop("attr_pairs")
            if not (data_exists and extra_exists):
                settings["context_settings"] = []
                return

            mapper = {0: (INDEX, 100), 1: (INSTANCEID, 100)}
            context = ContextHandler().new_context()
            context.values["attr_pairs"] = [
                tuple(mapper.get(var, (var, 100)) for var in pair)
                for pair in attr_pairs
            ]
            context.variables1 = {}
            context.variables2 = {}
            settings["context_settings"] = [context]
Esempio n. 29
0
class OWCorrelations(OWWidget):
    name = "Correlations"
    description = "Compute all pairwise attribute correlations."
    icon = "icons/Correlations.svg"
    priority = 1106

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)
        features = Output("Features", AttributeList)
        correlations = Output("Correlations", Table)

    want_main_area = False
    want_control_area = True

    correlation_type: int

    settings_version = 3
    settingsHandler = DomainContextHandler()
    selection = ContextSetting([])
    feature = ContextSetting(None)
    correlation_type = Setting(0)

    class Information(OWWidget.Information):
        removed_cons_feat = Msg("Constant features have been removed.")

    class Warning(OWWidget.Warning):
        not_enough_vars = Msg("At least two numeric features are needed.")
        not_enough_inst = Msg("At least two instances are needed.")

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.cont_data = None  # type: Table

        # GUI
        box = gui.vBox(self.controlArea)
        self.correlation_combo = gui.comboBox(
            box, self, "correlation_type", items=CorrelationType.items(),
            orientation=Qt.Horizontal, callback=self._correlation_combo_changed
        )

        self.feature_model = DomainModel(
            order=DomainModel.ATTRIBUTES, separators=False,
            placeholder="(All combinations)", valid_types=ContinuousVariable)
        gui.comboBox(
            box, self, "feature", callback=self._feature_combo_changed,
            model=self.feature_model
        )

        self.vizrank, _ = CorrelationRank.add_vizrank(
            None, self, None, self._vizrank_selection_changed)
        self.vizrank.button.setEnabled(False)
        self.vizrank.threadStopped.connect(self._vizrank_stopped)

        gui.separator(box)
        box.layout().addWidget(self.vizrank.filter)
        box.layout().addWidget(self.vizrank.rank_table)

        button_box = gui.hBox(self.buttonsArea)
        button_box.layout().addWidget(self.vizrank.button)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    @staticmethod
    def sizeHint():
        return QSize(350, 400)

    def _correlation_combo_changed(self):
        self.apply()

    def _feature_combo_changed(self):
        self.apply()

    def _vizrank_selection_changed(self, *args):
        self.selection = list(args)
        self.commit()

    def _vizrank_stopped(self):
        self._vizrank_select()

    def _vizrank_select(self):
        model = self.vizrank.rank_table.model()
        if not model.rowCount():
            return
        selection = QItemSelection()

        # This flag is needed because data in the model could be
        # filtered by a feature and therefore selection could not be found
        selection_in_model = False
        if self.selection:
            sel_names = sorted(var.name for var in self.selection)
            for i in range(model.rowCount()):
                # pylint: disable=protected-access
                names = sorted(x.name for x in model.data(
                    model.index(i, 0), CorrelationRank._AttrRole))
                if names == sel_names:
                    selection.select(model.index(i, 0),
                                     model.index(i, model.columnCount() - 1))
                    selection_in_model = True
                    break
        if not selection_in_model:
            selection.select(model.index(0, 0),
                             model.index(0, model.columnCount() - 1))
        self.vizrank.rank_table.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.clear_messages()
        self.data = data
        self.cont_data = None
        self.selection = []
        if data is not None:
            if len(data) < 2:
                self.Warning.not_enough_inst()
            else:
                domain = data.domain
                cont_vars = [a for a in domain.class_vars + domain.metas +
                             domain.attributes if a.is_continuous]
                cont_data = Table.from_table(Domain(cont_vars), data)
                remover = Remove(Remove.RemoveConstant)
                cont_data = remover(cont_data)
                if remover.attr_results["removed"]:
                    self.Information.removed_cons_feat()
                if len(cont_data.domain.attributes) < 2:
                    self.Warning.not_enough_vars()
                else:
                    self.cont_data = SklImpute()(cont_data)
            self.info.set_input_summary(len(data),
                                        format_summary_details(data))
        else:
            self.info.set_input_summary(self.info.NoInput)
        self.set_feature_model()
        self.openContext(self.cont_data)
        self.apply()
        self.vizrank.button.setEnabled(self.cont_data is not None)

    def set_feature_model(self):
        self.feature_model.set_domain(self.cont_data and self.cont_data.domain)
        data = self.data
        if self.cont_data and data.domain.has_continuous_class:
            self.feature = self.cont_data.domain[data.domain.class_var.name]
        else:
            self.feature = None

    def apply(self):
        self.vizrank.initialize()
        if self.cont_data is not None:
            # this triggers self.commit() by changing vizrank selection
            self.vizrank.toggle()
        else:
            self.commit()

    def commit(self):
        self.Outputs.data.send(self.data)
        summary = len(self.data) if self.data else self.info.NoOutput
        details = format_summary_details(self.data) if self.data else ""
        self.info.set_output_summary(summary, details)

        if self.data is None or self.cont_data is None:
            self.Outputs.features.send(None)
            self.Outputs.correlations.send(None)
            return

        attrs = [ContinuousVariable("Correlation"), ContinuousVariable("FDR")]
        metas = [StringVariable("Feature 1"), StringVariable("Feature 2")]
        domain = Domain(attrs, metas=metas)
        model = self.vizrank.rank_model
        x = np.array([[float(model.data(model.index(row, 0), role))
                       for role in (Qt.DisplayRole, CorrelationRank.PValRole)]
                      for row in range(model.rowCount())])
        x[:, 1] = FDR(list(x[:, 1]))
        # pylint: disable=protected-access
        m = np.array([[a.name for a in model.data(model.index(row, 0),
                                                  CorrelationRank._AttrRole)]
                      for row in range(model.rowCount())], dtype=object)
        corr_table = Table(domain, x, metas=m)
        corr_table.name = "Correlations"

        # data has been imputed; send original attributes
        self.Outputs.features.send(AttributeList(
            [self.data.domain[var.name] for var in self.selection]))
        self.Outputs.correlations.send(corr_table)

    def send_report(self):
        self.report_table(CorrelationType.items()[self.correlation_type],
                          self.vizrank.rank_table)

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            sel = context.values["selection"]
            context.values["selection"] = [(var.name, vartype(var))
                                           for var in sel[0]]
        if version < 3:
            sel = context.values["selection"]
            context.values["selection"] = ([(name, vtype + 100)
                                            for name, vtype in sel], -3)
Esempio n. 30
0
class OWSave(widget.OWWidget):
    name = "Save"
    description = "Save data to an output file."
    icon = "icons/Save.svg"
    author = "Martin Frlin"
    category = "Data"
    keywords = ["data", "save"]

    inputs = [("Data", Table, "dataset")]

    want_main_area = False
    resizing_enabled = False

    last_dir = Setting("")

    def __init__(self, data=None, file_formats=None):
        super().__init__()
        self.data = None
        self.filename = ""
        self.format_index = 0
        self.file_formats = file_formats or FileFormat.writers
        self.formats = [(f.DESCRIPTION, f.EXTENSIONS)
                        for f in sorted(set(self.file_formats.values()),
                                        key=lambda f: f.OWSAVE_PRIORITY)]
        self.comboBoxFormat = gui.comboBox(
            self.controlArea, self, value='format_index',
            items=['{} (*{})'.format(x[0], ' *'.join(x[1]))
                   for x in self.formats],
            box='File Format')
        box = gui.widgetBox(self.controlArea)
        self.save = gui.button(box, self, "Save", callback=self.save_file,
                               default=True, disabled=True)
        gui.separator(box)
        self.saveAs = gui.button(box, self, "Save as ...",
                                 callback=self.save_file_as, disabled=True)
        self.setMinimumWidth(320)
        self.adjustSize()
        if data:
            self.dataset(data)

    def dataset(self, data):
        self.data = data
        self.save.setDisabled(data is None)
        self.saveAs.setDisabled(data is None)

    def save_file_as(self):
        format_name, format_extensions = self.formats[self.format_index]
        home_dir = os.path.expanduser("~")
        filename = QtGui.QFileDialog.getSaveFileName(
            self, 'Save as ...',
            self.filename or os.path.join((self.last_dir or home_dir), getattr(self.data, 'name', '')),
            '{} (*{})'.format(format_name, ' *'.join(format_extensions)))
        if not filename:
            return
        for ext in format_extensions:
            if filename.endswith(ext):
                break
        else:
            filename += format_extensions[0]
        self.filename = filename
        self.last_dir, file_name = os.path.split(self.filename)
        self.save.setText("Save as '%s'" % file_name)
        self.save.setDisabled(False)
        self.save_file()

    def save_file(self):
        if not self.filename:
            self.save_file_as()
        elif self.data is not None:
            try:
                ext = self.formats[self.format_index][1]
                if not isinstance(ext, str):
                    ext = ext[0]  # is e.g. a tuple of extensions
                self.file_formats[ext].write(self.filename, self.data)
                self.error()
            except Exception as errValue:
                self.error(str(errValue))