예제 #1
0
 def test_tool_tips(self):
     scene = GraphicsScene()
     view = QGraphicsView(scene)
     w = TextListWidget()
     text = "A" * 10
     w.setItems([text, text])
     scene.addItem(w)
     view.grab()  # ensure w is laid out
     wrect = view.mapFromScene(w.mapToScene(
         w.contentsRect())).boundingRect()
     p = QPoint(wrect.topLeft() + QPoint(5, 5))
     ev = QHelpEvent(QHelpEvent.ToolTip, p, view.viewport().mapToGlobal(p))
     try:
         QApplication.sendEvent(view.viewport(), ev)
         self.assertEqual(QToolTip.text(), text)
     finally:
         QToolTip.hideText()
예제 #2
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300
    keywords = []

    class Inputs:
        data = Input("Data", (Orange.data.Table, Orange.misc.DistMatrix))

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the (displayed) silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    auto_commit = settings.Setting(True)

    pending_selection = settings.Setting(None, schema_only=True)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan),
                 ("Cosine", Orange.distance.Cosine)]

    graph_name = "scene"

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")
        singleton_clusters_all = Msg("All clusters are singletons")
        memory_error = Msg("Not enough memory")
        value_error = Msg("Distances could not be computed: '{}'")
        input_validation_error = Msg("{}")

    class Warning(widget.OWWidget.Warning):
        missing_cluster_assignment = Msg(
            "{} instance{s} omitted (missing cluster assignment)")
        nan_distances = Msg("{} instance{s} omitted (undefined distances)")
        ignoring_categorical = Msg("Ignoring categorical features")

    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None         # type: Optional[Orange.data.Table]
        #: The input distance matrix (if present)
        self.distances = None  # type: Optional[Orange.misc.DistMatrix]
        #: The effective distance matrix (is self.distances or computed from
        #: self.data depending on input)
        self._matrix = None      # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None        # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None      # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None     # type: Optional[SilhouettePlot]

        controllayout = self.controlArea.layout()
        assert isinstance(controllayout, QVBoxLayout)
        self._distances_gui_box = distbox = gui.widgetBox(
            None, "Distance"
        )
        self._distances_gui_cb = gui.comboBox(
            distbox, self, "distance_idx",
            items=[name for name, _ in OWSilhouettePlot.Distances],
            orientation=Qt.Horizontal, callback=self._invalidate_distances)
        controllayout.addWidget(distbox)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(
            box, self, "cluster_var_idx", contentsLength=14,
            searchable=True, callback=self._invalidate_scores
        )
        gui.checkBox(
            box, self, "group_by_cluster", "Group by cluster",
            callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(
            box, self, "bar_size", minValue=1, maxValue=10, step=1,
            callback=self._update_bar_size)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(
            box, self, "annotation_var_idx", contentsLength=14,
            callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.auto_send(self.buttonsArea, self, "auto_commit")

        self.scene = GraphicsScene(self)
        self.view = StickyGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

        self.settingsAboutToBePacked.connect(self.pack_settings)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    def pack_settings(self):
        if self.data and self._silplot is not None:
            self.pending_selection = list(self._silplot.selection())
        else:
            self.pending_selection = None

    @Inputs.data
    @check_sql_input
    def set_data(self, data: Union[Table, DistMatrix, None]):
        """
        Set the input dataset or distance matrix.
        """
        self.closeContext()
        self.clear()
        try:
            if isinstance(data, Orange.misc.DistMatrix):
                self._set_distances(data)
            elif isinstance(data, Orange.data.Table):
                self._set_table(data)
            else:
                self.distances = None
                self.data = None
        except InputValidationError as err:
            self.Error.input_validation_error(err.message)
            self.distances = None
            self.data = None

    def _set_table(self, data: Table):
        self._setup_control_models(data.domain)
        self.data = data
        self.distances = None

    def _set_distances(self, distances: DistMatrix):
        if isinstance(distances.row_items, Orange.data.Table) and \
                distances.axis == 1:
            data = distances.row_items
        else:
            raise ValidationError("Input matrix does not have associated data")

        if data is not None:
            self._setup_control_models(data.domain)
            self.distances = distances
            self.data = data

    def handleNewSignals(self):
        if not self._is_empty():
            self._update()
            self._replot()
            if self.pending_selection is not None and self._silplot is not None:
                # If selection contains indices that are too large, the data
                # file must had been modified, so we ignore selection
                if max(self.pending_selection, default=-1) < len(self.data):
                    self._silplot.setSelection(np.array(self.pending_selection))
                self.pending_selection = None

        # Disable/enable the Distances GUI controls if applicable
        self._distances_gui_box.setEnabled(self.distances is None)

        self.commit.now()

    def _setup_control_models(self, domain: Domain):
        groupvars = [
            v for v in domain.variables + domain.metas
            if v.is_discrete and len(v.values) >= 2]
        if not groupvars:
            raise NoGroupVariable()
        self.cluster_var_model[:] = groupvars
        if domain.class_var in groupvars:
            self.cluster_var_idx = groupvars.index(domain.class_var)
        else:
            self.cluster_var_idx = 0
        annotvars = [var for var in domain.variables + domain.metas
                     if var.is_string or var.is_discrete]
        self.annotation_var_model[:] = ["None"] + annotvars
        self.annotation_var_idx = 1 if annotvars else 0
        self.openContext(Orange.data.Domain(groupvars))

    def _is_empty(self) -> bool:
        # Is empty (does not have any input).
        return (self.data is None or len(self.data) == 0) \
               and self.distances is None

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self.distances = None
        self._matrix = None
        self._mask = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()
        self.Error.clear()
        self.Warning.clear()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self.view.setSceneRect(QRectF())
        self.view.setHeaderSceneRect(QRectF())
        self.view.setFooterSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = self._mask = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit.deferred()

    def _ensure_matrix(self):
        # ensure self._matrix is computed if necessary
        if self._is_empty():
            return
        if self._matrix is None:
            if self.distances is not None:
                self._matrix = np.asarray(self.distances)
            elif self.data is not None:
                data = self.data
                _, metric = self.Distances[self.distance_idx]
                if not metric.supports_discrete and any(
                        a.is_discrete for a in data.domain.attributes):
                    self.Warning.ignoring_categorical()
                    data = Orange.distance.remove_discrete_features(data)
                try:
                    self._matrix = np.asarray(metric(data))
                except MemoryError:
                    self.Error.memory_error()
                    return
                except ValueError as err:
                    self.Error.value_error(str(err))
                    return
            else:
                assert False, "invalid state"

    def _update(self):
        # Update/recompute the effective distances and scores as required.
        self._clear_messages()
        if self._is_empty():
            self._reset_all()
            return

        self._ensure_matrix()
        if self._matrix is None:
            return

        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        cluster_mask = np.isnan(labels)
        dist_mask = np.isnan(self._matrix).all(axis=0)
        mask = cluster_mask | dist_mask
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq = np.unique(labels)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if mask is not None:
            count_missing = np.count_nonzero(cluster_mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")
            count_nandist = np.count_nonzero(dist_mask)
            if count_nandist:
                self.Warning.nan_distances(
                    count_nandist, s="s" if count_nandist > 1 else "")

    def _reset_all(self):
        self._mask = None
        self._silhouette = None
        self._labels = None
        self._matrix = None
        self._clear_scene()

    def _clear_messages(self):
        self.Error.clear()
        self.Warning.clear()

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(
            not visible and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values,
                                  var.colors)
            else:
                silplot.setScores(
                    self._silhouette,
                    np.zeros(len(self._silhouette), dtype=int),
                    [""], np.array([[63, 207, 207]])
                )

            self.scene.addItem(silplot)
            self._update_annotations()
            silplot.selectionChanged.connect(self.commit.deferred)
            silplot.layout().activate()
            self._update_scene_rect()
            silplot.geometryChanged.connect(self._update_scene_rect)

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(
            self.bar_size < 5 and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                if self._mask is not None:
                    assert column.shape == self._mask.shape
                    # pylint: disable=invalid-unary-operand-type
                    column = column[~self._mask]
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def _update_scene_rect(self):
        geom = self._silplot.geometry()
        self.scene.setSceneRect(geom)
        self.view.setSceneRect(geom)

        header = self._silplot.topScaleItem()
        footer = self._silplot.bottomScaleItem()

        def extend_horizontal(rect):
            # type: (QRectF) -> QRectF
            rect = QRectF(rect)
            rect.setLeft(geom.left())
            rect.setRight(geom.right())
            return rect

        margin = 3
        if header is not None:
            self.view.setHeaderSceneRect(
                extend_horizontal(header.geometry().adjusted(0, 0, 0, margin)))
        if footer is not None:
            self.view.setFooterSceneRect(
                extend_horizontal(footer.geometry().adjusted(0, -margin, 0, 0)))

    @gui.deferred
    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    # pylint: disable=invalid-unary-operand-type
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape,
                                 fill_value=np.nan)
                # pylint: disable=invalid-unary-operand-type
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            var = self.cluster_var_model[self.cluster_var_idx]

            domain = self.data.domain
            proposed = "Silhouette ({})".format(escape(var.name))
            names = [var.name for var in itertools.chain(domain.attributes,
                                                         domain.class_vars,
                                                         domain.metas)]
            unique = get_unique_names(names, proposed)
            silhouette_var = Orange.data.ContinuousVariable(unique)
            domain = Orange.data.Domain(
                domain.attributes,
                domain.class_vars,
                domain.metas + (silhouette_var, ))

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, np.flatnonzero(selectedmask))

            if selected is not None:
                with selected.unlocked(selected.metas):
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]

            data = self.data.transform(domain)
            with data.unlocked(data.metas):
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
예제 #3
0
    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None         # type: Optional[Orange.data.Table]
        #: The input distance matrix (if present)
        self.distances = None  # type: Optional[Orange.misc.DistMatrix]
        #: The effective distance matrix (is self.distances or computed from
        #: self.data depending on input)
        self._matrix = None      # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None        # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None      # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None     # type: Optional[SilhouettePlot]

        controllayout = self.controlArea.layout()
        assert isinstance(controllayout, QVBoxLayout)
        self._distances_gui_box = distbox = gui.widgetBox(
            None, "Distance"
        )
        self._distances_gui_cb = gui.comboBox(
            distbox, self, "distance_idx",
            items=[name for name, _ in OWSilhouettePlot.Distances],
            orientation=Qt.Horizontal, callback=self._invalidate_distances)
        controllayout.addWidget(distbox)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(
            box, self, "cluster_var_idx", contentsLength=14,
            searchable=True, callback=self._invalidate_scores
        )
        gui.checkBox(
            box, self, "group_by_cluster", "Group by cluster",
            callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(
            box, self, "bar_size", minValue=1, maxValue=10, step=1,
            callback=self._update_bar_size)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(
            box, self, "annotation_var_idx", contentsLength=14,
            callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.auto_send(self.buttonsArea, self, "auto_commit")

        self.scene = GraphicsScene(self)
        self.view = StickyGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

        self.settingsAboutToBePacked.connect(self.pack_settings)