def test_encode_domain_with_false_attributes_in_res(self):
        handler = DomainContextHandler(attributes_in_res=False,
                                       metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})
    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete, 'd2': Discrete,
                          'd3': list('ghi')})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})
Example #3
0
    def test_encode_domain_with_false_attributes_in_res(self):
        handler = DomainContextHandler(attributes_in_res=False,
                                       metas_in_res=True)
        encoded_attributes, encoded_metas = handler.encode_domain(domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {
            CONTINUOUS_META: Continuous,
            DISCRETE_META_JKL: Discrete,
        })
    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete,
                          'd2': Discrete, 'd3': Discrete})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete, })
    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': list('abc'),
                          'd2': list('def'), 'd3': list('ghi')})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous, 'd4': list('jkl')})
Example #6
0
    def test_encode_domain_with_false_metas_in_res(self):
        handler = DomainContextHandler(attributes_in_res=True,
                                       metas_in_res=False)
        encoded_attributes, encoded_metas = handler.encode_domain(domain)

        self.assertEqual(encoded_attributes, {
            CONTINOUS_ATTR: Continuous,
            DISCRETE_ATTR_ABC: Discrete,
            DISCRETE_ATTR_DEF: Discrete,
            DISCRETE_CLASS_GHI: Discrete,
        })
        self.assertEqual(encoded_metas, {})
    def test_deprecated_str_as_var(self):
        if LooseVersion(Orange.__version__) >= LooseVersion("3.26"):
            # pragma: no cover
            self.fail("Remove support for variables stored as string settings "
                      "and this test.")

        context = Mock()
        context.attributes = {"foo": 2}
        context.metas = {}
        setting = ContextSetting("")
        setting.name = "setting_name"
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            DomainContextHandler.encode_setting(context, setting, "foo")
            self.assertIn("setting_name", w[0].message.args[0])
Example #8
0
    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(domain)

        self.assertEqual(
            encoded_attributes,
            {CONTINOUS_ATTR: VarTypes.Continuous,
             DISCRETE_ATTR_ABC: VarTypes.Discrete,
             DISCRETE_ATTR_DEF: VarTypes.Discrete,
             DISCRETE_CLASS_GHI: VarTypes.Discrete, })
        self.assertEqual(
            encoded_metas,
            {CONTINUOUS_META: VarTypes.Continuous,
             DISCRETE_META_JKL: VarTypes.Discrete, })
Example #9
0
    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(domain)

        self.assertEqual(encoded_attributes, {
            CONTINOUS_ATTR: Continuous,
            DISCRETE_ATTR_ABC: Discrete,
            DISCRETE_ATTR_DEF: Discrete,
            DISCRETE_CLASS_GHI: ["g", "h", "i"],
        })
        self.assertEqual(encoded_metas, {
            CONTINUOUS_META: Continuous,
            DISCRETE_META_JKL: Discrete,
        })
Example #10
0
    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(domain)

        self.assertEqual(encoded_attributes, {
            CONTINOUS_ATTR: Continuous,
            DISCRETE_ATTR_ABC: ["a", "b", "c"],
            DISCRETE_ATTR_DEF: ["d", "e", "f"],
            DISCRETE_CLASS_GHI: ["g", "h", "i"],
        })
        self.assertEqual(encoded_metas, {
            CONTINUOUS_META: Continuous,
            DISCRETE_META_JKL: ["j", "k", "l"],
        })
Example #11
0
 def setUp(self):
     self.handler = DomainContextHandler(attributes_in_res=True,
                                         metas_in_res=True)
     self.handler.read_defaults = lambda: None  # Disable reading settings from disk
     self.handler.bind(MockWidget)
     self.widget = MockWidget()
     encoded_attributes, encoded_metas = self.handler.encode_domain(domain)
     self.widget.current_context.attributes = encoded_attributes
     self.widget.current_context.metas = encoded_metas
     self.handler.initialize(self.widget)
Example #12
0
    def test_encode_domain_with_false_attributes_in_res(self):
        self.handler = DomainContextHandler(attributes_in_res=False,
                                            metas_in_res=True)
        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {
            'cm1': VarTypes.Continuous,
            'dm1': VarTypes.Discrete,
        })
 def setUp(self):
     self.domain = Domain(
         attributes=[ContinuousVariable('c1'),
                     DiscreteVariable('d1', values='abc'),
                     DiscreteVariable('d2', values='def')],
         class_vars=[DiscreteVariable('d3', values='ghi')],
         metas=[ContinuousVariable('c2'),
                DiscreteVariable('d4', values='jkl')]
     )
     self.args = (self.domain,
                  {'c1': Continuous, 'd1': Discrete,
                   'd2': Discrete, 'd3': Discrete},
                  {'c2': Continuous, 'd4': Discrete, })
     self.handler = DomainContextHandler(metas_in_res=True)
     self.handler.read_defaults = lambda: None
Example #14
0
class OWDataProjectionWidget(OWProjectionWidgetBase, openclass=True):
    """
    Base widget for widgets that get Data and Data Subset (both
    Orange.data.Table) on the input, and output Selected Data and Data
    (both Orange.data.Table).

    Beside that the widget displays data as two-dimensional projection
    of points.
    """
    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    class Warning(OWProjectionWidgetBase.Warning):
        too_many_labels = Msg(
            "Too many labels to show (zoom in or label only selected)")
        subset_not_subset = Msg(
            "Subset data contains some instances that do not appear in "
            "input data")
        subset_independent = Msg(
            "No subset data instances appear in input data")

    settingsHandler = DomainContextHandler()
    selection = Setting(None, schema_only=True)
    auto_commit = Setting(True)

    GRAPH_CLASS = OWScatterPlotBase
    graph = SettingProvider(OWScatterPlotBase)
    graph_name = "graph.plot_widget.plotItem"
    embedding_variables_names = ("proj-x", "proj-y")
    left_side_scrolling = True

    input_changed = Signal(object)
    output_changed = Signal(object)

    def __init__(self):
        super().__init__()
        self.subset_data = None
        self.subset_indices = None
        self.__pending_selection = self.selection
        self._invalidated = True
        self._domain_invalidated = True
        self.input_changed.connect(self.set_input_summary)
        self.output_changed.connect(self.set_output_summary)
        self.setup_gui()

    # GUI
    def setup_gui(self):
        self._add_graph()
        self._add_controls()
        self.input_changed.emit(None)
        self.output_changed.emit(None)

    def _add_graph(self):
        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = self.GRAPH_CLASS(self, box)
        box.layout().addWidget(self.graph.plot_widget)
        self.graph.too_many_labels.connect(
            lambda too_many: self.Warning.too_many_labels(shown=too_many))

    def _add_controls(self):
        self.gui = OWPlotGUI(self)
        area = self.controlArea
        self._point_box = self.gui.point_properties_box(area)
        self._effects_box = self.gui.effects_box(area)
        self._plot_box = self.gui.plot_properties_box(area)
        self.control_area_stretch = gui.widgetBox(area)
        self.control_area_stretch.layout().addStretch(100)
        self.gui.box_zoom_select(area)
        gui.auto_send(area, self, "auto_commit")

    @property
    def effective_variables(self):
        return self.data.domain.attributes

    @property
    def effective_data(self):
        return self.data.transform(
            Domain(self.effective_variables, self.data.domain.class_vars,
                   self.data.domain.metas))

    # Input
    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        data_existed = self.data is not None
        effective_data = self.effective_data if data_existed else None
        same_domain = (data_existed and data is not None and
                       data.domain.checksum() == self.data.domain.checksum())
        self.closeContext()
        self.data = data
        self.check_data()
        if not same_domain:
            self.init_attr_values()
        self.openContext(self.data)
        self._invalidated = not (data_existed
                                 and self.data is not None and array_equal(
                                     effective_data.X, self.effective_data.X))
        self._domain_invalidated = not (
            data_existed and self.data is not None
            and effective_data.domain.checksum()
            == self.effective_data.domain.checksum())
        if self._invalidated:
            self.clear()
            self.input_changed.emit(data)
        self.enable_controls()

    def check_data(self):
        self.clear_messages()

    def enable_controls(self):
        self.cb_class_density.setEnabled(self.can_draw_density())

    @Inputs.data_subset
    @check_sql_input
    def set_subset_data(self, subset):
        self.subset_data = subset
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        self._handle_subset_data()
        if self._invalidated:
            self._invalidated = False
            self.setup_plot()
        else:
            self.graph.update_point_props()
        self.unconditional_commit()

    def _handle_subset_data(self):
        self.Warning.subset_independent.clear()
        self.Warning.subset_not_subset.clear()
        if self.data is None or self.subset_data is None:
            self.subset_indices = set()
        else:
            self.subset_indices = set(self.subset_data.ids)
            ids = set(self.data.ids)
            if not self.subset_indices & ids:
                self.Warning.subset_independent()
            elif self.subset_indices - ids:
                self.Warning.subset_not_subset()

    def set_input_summary(self, data):
        summary = str(len(data)) if data else self.info.NoInput
        self.info.set_input_summary(summary)

    def set_output_summary(self, data):
        summary = str(len(data)) if data else self.info.NoInput
        self.info.set_output_summary(summary)

    def get_subset_mask(self):
        if not self.subset_indices:
            return None
        valid_data = self.data[self.valid_data]
        return np.fromiter((ex.id in self.subset_indices for ex in valid_data),
                           dtype=np.bool,
                           count=len(valid_data))

    # Plot
    def get_embedding(self):
        """A get embedding method.

        Derived classes must override this method. The overridden method
        should return embedding for all data (valid and invalid). Invalid
        data embedding coordinates should be set to 0 (in some cases to Nan).

        The method should also set self.valid_data.

        Returns:
            np.array: Array of embedding coordinates with shape
            len(self.data) x 2
        """
        raise NotImplementedError

    def get_coordinates_data(self):
        embedding = self.get_embedding()
        if embedding is not None and len(embedding[self.valid_data]):
            return embedding[self.valid_data].T
        return None, None

    def setup_plot(self):
        self.graph.reset_graph()
        self.__pending_selection = self.selection or self.__pending_selection
        self.apply_selection()

    # Selection
    def apply_selection(self):
        pending = self.__pending_selection
        if self.data is not None and pending is not None and len(pending) \
                and max(i for i, _ in pending) < self.graph.n_valid:
            index_group = np.array(pending).T
            selection = np.zeros(self.graph.n_valid, dtype=np.uint8)
            selection[index_group[0]] = index_group[1]

            self.selection = self.__pending_selection
            self.__pending_selection = None
            self.graph.selection = selection
            self.graph.update_selection_colors()

    def selection_changed(self):
        sel = None if self.data and isinstance(self.data, SqlTable) \
            else self.graph.selection
        self.selection = [(i, x) for i, x in enumerate(sel) if x] \
            if sel is not None else None
        self.commit()

    # Output
    def commit(self):
        self.send_data()

    def send_data(self):
        group_sel, data, graph = None, self._get_projection_data(), self.graph
        if graph.selection is not None:
            group_sel = np.zeros(len(data), dtype=int)
            group_sel[self.valid_data] = graph.selection
        selected = self._get_selected_data(data, graph.get_selection(),
                                           group_sel)
        self.output_changed.emit(selected)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            self._get_annotated_data(data, graph.get_selection(), group_sel,
                                     graph.selection))

    def _get_projection_data(self):
        if self.data is None or self.embedding_variables_names is None:
            return self.data
        variables = self._get_projection_variables()
        data = self.data.transform(
            Domain(self.data.domain.attributes, self.data.domain.class_vars,
                   self.data.domain.metas + variables))
        data.metas[:, -2:] = self.get_embedding()
        return data

    def _get_projection_variables(self):
        names = get_unique_names(self.data.domain,
                                 self.embedding_variables_names)
        return ContinuousVariable(names[0]), ContinuousVariable(names[1])

    @staticmethod
    def _get_selected_data(data, selection, group_sel):
        return create_groups_table(data, group_sel, False, "Group") \
            if len(selection) else None

    @staticmethod
    def _get_annotated_data(data, selection, group_sel, graph_sel):
        if graph_sel is not None and np.max(graph_sel) > 1:
            return create_groups_table(data, group_sel)
        else:
            return create_annotated_table(data, selection)

    # Report
    def send_report(self):
        if self.data is None:
            return

        caption = self._get_send_report_caption()
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def _get_send_report_caption(self):
        return report.render_items_vert(
            (("Color", self._get_caption_var_name(self.attr_color)),
             ("Label", self._get_caption_var_name(self.attr_label)),
             ("Shape", self._get_caption_var_name(self.attr_shape)),
             ("Size", self._get_caption_var_name(self.attr_size)),
             ("Jittering", self.graph.jitter_size != 0
              and "{} %".format(self.graph.jitter_size))))

    @staticmethod
    def _get_caption_var_name(var):
        return var.name if isinstance(var, Variable) else var

    # Misc
    def sizeHint(self):
        return QSize(1132, 708)

    def clear(self):
        self.selection = None
        self.graph.selection = None

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()
        self.graph.clear()
Example #15
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `data` is a signal handler fills the list boxes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute or grouping (callbacks for
    list boxes). It recomputes box data by calling `compute_box_data`, shows
    the appropriate display box (discrete/continuous) and then calls
    `layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box plot"
    description = "Shows box plots"
    long_description = """Shows box plots, either one for or multiple
    box plots for data split by an attribute value."""
    icon = "icons/BoxPlot.svg"
    priority = 100
    author = "Amela Rakanović, Janez Demšar"
    inputs = [("Data", Table, "data")]
    outputs = [("Basic statistic", Table)]

    settingsHandler = DomainContextHandler()
    display = Setting(0)
    grouping_select = ContextSetting([0])
    attributes_select = ContextSetting([0])
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    colorSettings = Setting(None)
    selectedSchemaIndex = Setting(0)

    _sorting_criteria_attrs = ["", "", "median", "mean"]
    _label_positions = ["q25", "median", "mean"]

    _pen_axis_tick = QtGui.QPen(QtCore.Qt.white, 5)
    _pen_axis = QtGui.QPen(QtCore.Qt.darkGray, 3)
    _pen_median = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(QtCore.Qt.DotLine)
    _post_line_pen = QtGui.QPen(QtCore.Qt.lightGray, 2)
    _post_grp_pen = QtGui.QPen(QtCore.Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted,
                _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(QtCore.Qt.RoundCap)
        pen.setJoinStyle(QtCore.Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(QtCore.Qt.FlatCap)

    _box_brush = QtGui.QBrush(QtGui.QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QtGui.QFont()
    _axis_font.setPixelSize(12)
    _label_font = QtGui.QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff))

    def __init__(self):
        super().__init__()
        self.grouping = []
        self.attributes = []
        self.stats = []
        self.ddataset = None

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = self.label_width \
            = 0

        self.attr_list_box = gui.listBox(
            self.controlArea, self, "attributes_select", "attributes",
            box="Variable", callback=self.attr_changed)
        self.attrCombo = gui.listBox(
            self.controlArea, self, 'grouping_select', "grouping",
            box="Grouping", callback=self.attr_changed)
        self.sorting_combo = gui.radioButtonsInBox(
            self.controlArea, self, 'display', box='Display',
            callback=self.display_changed,
            btnLabels=["Box plots", "Annotated boxes",
                       "Compare medians", "Compare means"])
        self.stretching_box = gui.checkBox(
            self.controlArea, self, 'stretched', "Stretch bars", box='Display',
            callback=self.display_changed).box
        gui.rubber(self.controlArea)

        gui.widgetBox(self.mainArea, addSpace=True)
        self.boxScene = QtGui.QGraphicsScene()
        self.boxView = QtGui.QGraphicsView(self.boxScene)
        self.boxView.setRenderHints(QtGui.QPainter.Antialiasing |
                                    QtGui.QPainter.TextAntialiasing |
                                    QtGui.QPainter.SmoothPixmapTransform)
        self.mainArea.layout().addWidget(self.boxView)
        self.posthoc_lines = []
        e = gui.widgetBox(self.mainArea, addSpace=False, orientation=0)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(650)

        self.warning = gui.widgetBox(self.controlArea, "Warning:")
        self.warning_info = gui.widgetLabel(self.warning, "")
        self.warning.hide()

        self.stats = self.dist = self.conts = []
        self.is_continuous = False
        self.set_display_box()

        dlg = self.createColorDialog()
        self.discPalette = dlg.getDiscretePalette("discPalette")

    def resizeEvent(self, ev):
        super().resizeEvent(ev)
        self.layout_changed()

    # noinspection PyPep8Naming
    def setColors(self):
        dlg = self.createColorDialog()
        if dlg.exec_():
            self.colorSettings = dlg.getColorSchemas()
            self.selectedSchemaIndex = dlg.selectedSchemaIndex
            self.discPalette = dlg.getDiscretePalette("discPalette")
            self.display_changed()

    # noinspection PyPep8Naming
    def createColorDialog(self):
        c = colorpalette.ColorPaletteDlg(self, "Color Palette")
        c.createDiscretePalette("discPalette", "Discrete Palette")
        c.setColorSchemas(self.colorSettings, self.selectedSchemaIndex)
        return c

    # noinspection PyTypeChecker
    def data(self, dataset):
        if dataset is not None and (
                not bool(dataset) or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.ddataset = dataset
        self.grouping_select = []
        self.attributes_select = []
        self.attr_list_box.clear()
        self.attrCombo.clear()
        if dataset:
            self.openContext(self.ddataset)
            self.attributes = [(a.name, vartype(a)) for a in dataset.domain]
            self.grouping = ["None"] + [(a.name, vartype(a))
                                        for a in dataset.domain
                                        if isinstance(a, DiscreteVariable)]
            self.grouping_select = [0]
            self.attributes_select = [0]
            self.attr_changed()
        else:
            self.reset_all_data()

    def reset_all_data(self):
        self.attr_list_box.clear()
        self.attrCombo.clear()
        self.boxScene.clear()
        self.send("Basic statistic", None)
        self.send("Significant data", None)

    def attr_changed(self):
        self.compute_box_data()
        self.set_display_box()
        self.layout_changed()

    def compute_box_data(self):
        dataset = self.ddataset
        if dataset is None:
            self.stats = self.dist = self.conts = []
            return
        attr_ind = self.attributes_select[0]
        attr = dataset.domain[attr_ind]
        self.is_continuous = isinstance(attr, ContinuousVariable)
        group_by = self.grouping_select[0]
        if group_by:
            group_attr = self.grouping[group_by][0]
            group_ind = dataset.domain.index(group_attr)
            self.dist = []
            self.conts = datacaching.getCached(
                dataset, contingency.get_contingency,
                (dataset, attr_ind, group_ind))
            if self.is_continuous:
                self.stats = [BoxData(cont) for cont in self.conts]
            self.label_txts = dataset.domain[group_ind].values
        else:
            self.dist = datacaching.getCached(
                dataset, distribution.get_distribution, (dataset, attr_ind))
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist)]
            self.label_txts = [""]
        self.stats = [stat for stat in self.stats if stat.N > 0]

    def set_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.sorting_combo.show()
            self.sorting_combo.setDisabled(len(self.stats) < 2)
        else:
            self.stretching_box.show()
            self.sorting_combo.hide()

    def clear_scene(self):
        self.boxScene.clear()
        self.posthoc_lines = []

    def layout_changed(self):
        self.clear_scene()
        if len(self.conts) == len(self.dist) == 0:
            return
        if not self.is_continuous:
            return self.display_changed_disc()

        attr = self.attributes[self.attributes_select[0]][0]
        attr = self.ddataset.domain[attr]

        self.mean_labels = [self.mean_label(stat, attr, lab)
                            for stat, lab in zip(self.stats, self.label_txts)]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [self.label_group(stat, attr, mean_lab)
                       for stat, mean_lab in zip(self.stats, self.mean_labels)]
        self.attr_labels = [self.attr_label(lab) for lab in self.label_txts]
        for it in itertools.chain(self.labels, self.boxes, self.attr_labels):
            self.boxScene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if not self.is_continuous:
            return self.display_changed_disc()

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.display]
        if criterion:
            self.order.sort(key=lambda i: getattr(self.stats[i], criterion))
        heights = 90 if self.display == 1 else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            self.boxes[box_index].setY(y)
            labels = self.labels[box_index]
            if self.display == 1:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()
            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.display == 1:
                label.hide()
            else:
                stat = self.stats[box_index]
                poss = (stat.q25, -1, stat.median + 5 / self.scale_x,
                        stat.mean + 5 / self.scale_x)
                label.show()
                label.setX(poss[self.display] * self.scale_x)

        r = QtCore.QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                          self.scene_width, len(self.stats) * heights + 90)
        self.boxScene.setSceneRect(r)
        self.boxView.centerOn(self.scene_min_x + self.scene_width / 2,
                              -30 - len(self.stats) * heights / 2 + 45)

        self.compute_tests()
        self.show_posthoc()

    def display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [self.attr_label(lab) for lab in self.label_txts]
        self.draw_axis_disc()
        if self.grouping_select[0]:
            self.discPalette.set_number_of_colors(len(self.conts[0]))
            self.boxes = [self.strudel(cont) for cont in self.conts]
        else:
            self.discPalette.set_number_of_colors(len(self.dist))
            self.boxes = [self.strudel(self.dist)]

        for row, box in enumerate(self.boxes):
            y = (-len(self.boxes) + row) * 40 + 10
            self.boxScene.addItem(box)
            box.setPos(0, y)
            label = self.attr_labels[row]
            b = label.boundingRect()
            label.setPos(-b.width() - 10, y - b.height() / 2)
            self.boxScene.addItem(label)
        self.boxScene.setSceneRect(-self.label_width - 5,
                                   -30 - len(self.boxes) * 40,
                                   self.scene_width, len(self.boxes * 40) + 90)
        self.boxView.centerOn(self.scene_width / 2,
                              -30 - len(self.boxes) * 40 / 2 + 45)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            pooled_var = d1.var / d1.N + d2.var / d2.N
            df = pooled_var ** 2 / \
                ((d1.var / d1.N) ** 2 / (d1.N - 1) +
                 (d2.var / d2.N) ** 2 / (d2.N - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            N = sum(stat.N for stat in self.stats)
            grand_avg = sum(stat.N * stat.mean for stat in self.stats) / N
            var_between = sum(stat.N * (stat.mean - grand_avg) ** 2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.N * stat.var for stat in self.stats)
            df_within = N - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        self.warning.hide()
        if self.display < 2 or len(self.stats) < 2:
            t = ""
        elif any(s.N <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.display == 2:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.display == 2:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    @staticmethod
    def attr_label(text):
        return QtGui.QGraphicsSimpleTextItem(text)

    def mean_label(self, stat, attr, val_name):
        label = QtGui.QGraphicsItemGroup()
        t = QtGui.QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QtGui.QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QtGui.QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        bottom = min(stat.a_min for stat in self.stats)
        top = max(stat.a_max for stat in self.stats)

        first_val, step = compute_scale(bottom, top)
        while bottom < first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + (no_ticks - 1) * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in self.stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in self.stats))

        bv = self.boxView
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(self.stats, self.mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        attr = self.attributes[self.attributes_select[0]][0]
        attr_desc = self.ddataset.domain[attr]
        while True:
            l = self.boxScene.addLine(val * scale_x, -1, val * scale_x, 1,
                                      self._pen_axis_tick)
            l.setZValue(100)
            t = self.boxScene.addSimpleText(
                attr_desc.repr_val(val), self._axis_font)
            t.setFlags(t.flags() |
                       QtGui.QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.boxScene.addLine(bottom * scale_x - 4, 0,
                              top * scale_x + 4, 0, self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        if self.stretched:
            step = steps = 10
        else:
            if self.grouping_select[0]:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step)
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.boxView
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width
        self.scale_x = scale_x = (self.scene_width - lab_width - 10) / max_box

        self.boxScene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.boxScene.addLine(val * scale_x, -1, val * scale_x, 1,
                                      self._pen_axis_tick)
            l.setZValue(100)
            t = self.boxScene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QtGui.QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QtGui.QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QtGui.QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QtGui.QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QtGui.QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        msc = stat.median * self.scale_x
        med_t = centered_text(stat.median, msc)
        med_box_width2 = med_t.boundingRect().width()
        line(msc)

        x = stat.q25 * self.scale_x
        t = centered_text(stat.q25, x)
        t_box = t.boundingRect()
        med_left = msc - med_box_width2
        if x + t_box.width() / 2 >= med_left - 5:
            move_label(t, x, med_left - t_box.width() - 5)
        else:
            line(x)

        x = stat.q75 * self.scale_x
        t = centered_text(stat.q75, x)
        t_box = t.boundingRect()
        med_right = msc + med_box_width2
        if x - t_box.width() / 2 <= med_right + 5:
            move_label(t, x, med_right + 5)
        else:
            line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QtGui.QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1,
                                           *args)

        scale_x = self.scale_x
        box = QtGui.QGraphicsItemGroup()
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5, box)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5, box)
        vert_line = line(stat.a_min, 0, stat.a_max, 0, box)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3, box)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0, box)
        var_line.setPen(self._pen_paramet)

        mbox = QtGui.QGraphicsRectItem(stat.q25 * scale_x, -height / 2,
                                       (stat.q75 - stat.q25) * scale_x, height,
                                       box)
        mbox.setBrush(self._box_brush)
        mbox.setPen(QtGui.QPen(QtCore.Qt.NoPen))
        mbox.setZValue(-200)

        median_line = line(stat.median, -height / 2,
                           stat.median, height / 2, box)
        median_line.setPen(self._pen_median)
        median_line.setZValue(-150)

        return box

    def strudel(self, dist):
        ss = np.sum(dist)
        box = QtGui.QGraphicsItemGroup()
        if ss < 1e-6:
            QtGui.QGraphicsRectItem(0, -10, 1, 10, box)
        cum = 0
        get_color = self.discPalette.getRGB
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            rect = QtGui.QGraphicsRectItem(cum + 1, -6, v - 2, 12, box)
            rect.setBrush(QtGui.QBrush(QtGui.QColor(*get_color(i))))
            rect.setPen(QtGui.QPen(QtCore.Qt.NoPen))
            cum += v
        return box

    def show_posthoc(self):
        def line(y0, y1):
            it = self.boxScene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.boxScene.removeItem(self.posthoc_lines.pop())
        if self.display < 2 or len(self.stats) < 2:
            return
        crit_line = self._sorting_criteria_attrs[self.display]
        xs = []
        y_up = -len(self.stats) * 60 + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line) * self.scale_x
            xs.append(x)
            by = y_up + pos * 60
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if last_to == to or frm == to:
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = - 6 - rowi * 6
            it = self.boxScene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                       self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to
Example #16
0
class ProbabilityPlot(OWWidget):
    name = "Probability Plot"
    icon = "icons/probplot.svg"
    want_main_area = True
    inputs = [("Data", Orange.data.Table, "set_data")]
    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    group_var = ContextSetting(None)

    def __init__(self):
        super().__init__()
        self.distribution_idx = 0
        self.var_data = np.array([])
        self.column_data = np.array([])
        self.dataset = None
        self.column_idx = 0
        self.var_idx = 0
        self.available_plot = ["Probability Plot", "Q-Q Plot", "P-P Plot",
                               "Q-Q Plot of 2 samples"]
        self.attrs = VariableListModel()
        self.all_attrs = VariableListModel()

        gui.listView(
            self.controlArea, self, "attribute", box="First variable",
            model=self.attrs, callback=self.attr_changed)
        self.view2 = gui.listView(
            self.controlArea, self, "group_var", box="Second variable",
            model=self.attrs, callback=self.var_changed)
        box = gui.vBox(self.controlArea, 'Type of plot')
        self.distribution_choose = gui.radioButtonsInBox(
            box, self, 'distribution_idx',
            btnLabels=self.available_plot,
            callback=self.plot_changed,
        )
        self.figure = plt.figure()
        self.canvas = FigureCanvas(self.figure)
        self.mainArea.frameGeometry().width()
        self.mainArea.layout().addWidget(self.canvas)

    def set_data(self, dataset):
        self.view2.hide()
        self.clear_plot()
        if dataset is not None and (
                    not bool(dataset) or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.attribute = None
        if dataset:
            domain = dataset.domain

            # all atributes from dataset
            self.all_attrs[:] = list(domain) + [
                meta for meta in domain.metas
                if meta.is_continuous or meta.is_discrete]
            # atributes in list
            self.attrs[:] = [a for a in chain(domain.variables, domain.metas)
                             if a.is_continuous]
            # initial
            if self.attrs:
                self.attribute = self.attrs[0]
                self.group_var = self.attrs[0]
                self.openContext(self.dataset)
                self.var_changed()
                self.attr_changed()

    def plot_changed(self):
        """
        Selection of type of plot.
        :return: plot function
        """
        self.clear_plot()

        if self.distribution_idx == 1:
            self.view2.hide()
            self.qq_plot()
        if self.distribution_idx == 2:
            self.view2.hide()
            self.pp_plot()
        if self.distribution_idx == 3:
            self.view2.show()
            self.qq_plot_2samples()
        if self.distribution_idx == 0:
            self.view2.hide()
            self.prob_plot()

    def prob_plot(self):
        """
        :return:  Probability plot
        """
        self.ax = self.figure.add_subplot(111)
        self.ax.hold(True)
        stats.probplot(self.column_data, dist="norm", plot=plt)
        self.canvas.draw()

    def qq_plot_2samples(self):
        """
        :return: Q-Q plot between two samples
        """
        self.ax = self.figure.add_subplot(111)
        self.ax.hold(True)
        pp_x = sm.ProbPlot(self.column_data)
        pp_y = sm.ProbPlot(self.var_data)
        qqplot_2samples(pp_x, pp_y, ax=self.ax)
        self.canvas.draw()

    def pp_plot(self):
        """
        :return: P-P plot
        """
        self.ax = self.figure.add_subplot(111)
        self.ax.hold(True)
        probplot = sm.ProbPlot(self.column_data)
        probplot.ppplot(ax=self.ax, line='45')
        self.canvas.draw()

    def qq_plot(self):
        """
        :return: Q-Q plot
        """
        self.ax = self.figure.add_subplot(111)
        self.ax.hold(True)
        sm.qqplot(self.column_data, line="q", ax=self.ax)
        self.canvas.draw()

    def clear_plot(self):
        """
        After all change of type of plot or one of atributes
        :return: clear plot - blank
        """
        self.ax = self.figure.add_subplot(111)
        self.ax.hold(False)
        self.ax.plot([], '*-')
        self.canvas.draw()

    def attr_changed(self):
        """
        Select index of column.
        :return: change plot
        """
        self.clear_plot()
        for i in enumerate(self.all_attrs):
            if self.attribute == i[1]:
                self.column_idx = i[0]
        self.var_data = self.var()
        self.column_data = self.column()
        self.plot_changed()

    def var_changed(self):
        """
        Select index of secound column to 2 samples to qq plot.
        :return: change plot
        """
        self.clear_plot()
        for i in enumerate(self.all_attrs):
            if self.group_var == i[1]:
                self.var_idx = i[0]
        self.var_data = self.var()
        self.column_data = self.column()
        self.plot_changed()

    def column(self):
        """
        Chose data and set 0.0 in missing data
        :return: data of choosen column
        """
        l = self.dataset[:, self.column_idx]
        result = []
        for sublist in l:
            for item in sublist:
                if math.isnan(item):
                    result.append(0.0)
                else:
                    result.append(item)
        return np.array(result)

    def var(self):
        """
        Chose data and set 0.0 in missing data
        :return: data of choosen second column
        """
        l = self.dataset[:, self.var_idx]
        result = []
        for sublist in l:
            for item in sublist:
                if math.isnan(item):
                    result.append(0.0)
                else:
                    result.append(item)
        return np.array(result)

    def clear_scene(self):
        self.closeContext()
        self.openContext(self.dataset)
Example #17
0
class OWHyper(OWWidget):
    name = "HyperSpectra"

    class Inputs:
        data = Input("Data", Orange.data.Table, default=True)

    class Outputs:
        selected_data = Output("Selection", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    icon = "icons/hyper.svg"
    priority = 20
    replaces = ["orangecontrib.infrared.widgets.owhyper.OWHyper"]

    settings_version = 3
    settingsHandler = DomainContextHandler()

    imageplot = SettingProvider(ImagePlot)
    curveplot = SettingProvider(CurvePlotHyper)

    integration_method = Setting(0)
    integration_methods = Integrate.INTEGRALS
    value_type = Setting(0)
    attr_value = ContextSetting(None)

    lowlim = Setting(None)
    highlim = Setting(None)
    choose = Setting(None)

    graph_name = "imageplot.plotview"  # defined so that the save button is shown

    class Warning(OWWidget.Warning):
        threshold_error = Msg("Low slider should be less than High")

    class Error(OWWidget.Warning):
        image_too_big = Msg("Image for chosen features is too big ({} x {}).")

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            # delete the saved attr_value to prevent crashes
            try:
                del settings_["context_settings"][0].values["attr_value"]
            except:
                pass

        # migrate selection
        if version <= 2:
            try:
                current_context = settings_["context_settings"][0]
                selection = getattr(current_context, "selection", None)
                if selection is not None:
                    selection = [(i, 1) for i in np.flatnonzero(np.array(selection))]
                    settings_.setdefault("imageplot", {})["selection_group_saved"] = selection
            except:
                pass

    def __init__(self):
        super().__init__()

        dbox = gui.widgetBox(self.controlArea, "Image values")

        rbox = gui.radioButtons(
            dbox, self, "value_type", callback=self._change_integration)

        gui.appendRadioButton(rbox, "From spectra")

        self.box_values_spectra = gui.indentedBox(rbox)

        gui.comboBox(
            self.box_values_spectra, self, "integration_method", valueType=int,
            items=(a.name for a in self.integration_methods),
            callback=self._change_integral_type)
        gui.rubber(self.controlArea)

        gui.appendRadioButton(rbox, "Use feature")

        self.box_values_feature = gui.indentedBox(rbox)

        self.feature_value_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                               valid_types=DomainModel.PRIMITIVE)
        self.feature_value = gui.comboBox(
            self.box_values_feature, self, "attr_value",
            callback=self.update_feature_value, model=self.feature_value_model,
            sendSelectedValue=True, valueType=str)

        splitter = QSplitter(self)
        splitter.setOrientation(Qt.Vertical)
        self.imageplot = ImagePlot(self)
        self.imageplot.selection_changed.connect(self.output_image_selection)

        self.curveplot = CurvePlotHyper(self, select=SELECTONE)
        self.curveplot.selection_changed.connect(self.redraw_data)
        self.curveplot.plot.vb.x_padding = 0.005  # pad view so that lines are not hidden
        splitter.addWidget(self.imageplot)
        splitter.addWidget(self.curveplot)
        self.mainArea.layout().addWidget(splitter)

        self.line1 = MovableVline(position=self.lowlim, label="", report=self.curveplot)
        self.line1.sigMoved.connect(lambda v: setattr(self, "lowlim", v))
        self.line2 = MovableVline(position=self.highlim, label="", report=self.curveplot)
        self.line2.sigMoved.connect(lambda v: setattr(self, "highlim", v))
        self.line3 = MovableVline(position=self.choose, label="", report=self.curveplot)
        self.line3.sigMoved.connect(lambda v: setattr(self, "choose", v))
        for line in [self.line1, self.line2, self.line3]:
            line.sigMoveFinished.connect(self.changed_integral_range)
            self.curveplot.add_marking(line)
            line.hide()

        self.data = None
        self.disable_integral_range = False

        self.resize(900, 700)
        self._update_integration_type()

        # prepare interface according to the new context
        self.contextAboutToBeOpened.connect(lambda x: self.init_interface_data(x[0]))

    def init_interface_data(self, data):
        same_domain = (self.data and data and
                       data.domain == self.data.domain)
        if not same_domain:
            self.init_attr_values(data)

    def output_image_selection(self):
        if not self.data:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.curveplot.set_data(None)
            return

        indices = np.flatnonzero(self.imageplot.selection_group)

        annotated_data = create_groups_table(self.data, self.imageplot.selection_group)
        if annotated_data is not None:
            annotated_data.X = self.data.X  # workaround for Orange's copying on domain conversio
        self.Outputs.annotated_data.send(annotated_data)

        selected = self.data[indices]
        self.Outputs.selected_data.send(selected if selected else None)
        if selected:
            self.curveplot.set_data(selected)
        else:
            self.curveplot.set_data(self.data)

    def init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.feature_value_model.set_domain(domain)
        self.attr_value = self.feature_value_model[0] if self.feature_value_model else None

    def redraw_data(self):
        self.imageplot.update_view()

    def update_feature_value(self):
        self.redraw_data()

    def _update_integration_type(self):
        self.line1.hide()
        self.line2.hide()
        self.line3.hide()
        if self.value_type == 0:
            self.box_values_spectra.setDisabled(False)
            self.box_values_feature.setDisabled(True)
            if self.integration_methods[self.integration_method] != Integrate.PeakAt:
                self.line1.show()
                self.line2.show()
            else:
                self.line3.show()
        elif self.value_type == 1:
            self.box_values_spectra.setDisabled(True)
            self.box_values_feature.setDisabled(False)
        QTest.qWait(1)  # first update the interface

    def _change_integration(self):
        # change what to show on the image
        self._update_integration_type()
        self.redraw_data()

    def changed_integral_range(self):
        if self.disable_integral_range:
            return
        self.redraw_data()

    def _change_integral_type(self):
        self._change_integration()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()

        def valid_context(data):
            if data is None:
                return False
            annotation_features = [v for v in data.domain.metas + data.domain.class_vars
                                   if isinstance(v, (DiscreteVariable, ContinuousVariable))]
            return len(annotation_features) >= 1

        if valid_context(data):
            self.openContext(data)
        else:
            # to generate valid interface even if context was not loaded
            self.contextAboutToBeOpened.emit([data])
        self.data = data
        self.imageplot.set_data(data)
        self.curveplot.set_data(data)
        self._init_integral_boundaries()
        self.imageplot.update_view()
        self.output_image_selection()

    def _init_integral_boundaries(self):
        # requires data in curveplot
        self.disable_integral_range = True
        if self.curveplot.data_x is not None and len(self.curveplot.data_x):
            minx = self.curveplot.data_x[0]
            maxx = self.curveplot.data_x[-1]
        else:
            minx = 0.
            maxx = 1.

        if self.lowlim is None or not minx <= self.lowlim <= maxx:
            self.lowlim = minx
        self.line1.setValue(self.lowlim)

        if self.highlim is None or not minx <= self.highlim <= maxx:
            self.highlim = maxx
        self.line2.setValue(self.highlim)

        if self.choose is None:
            self.choose = (minx + maxx)/2
        elif self.choose < minx:
            self.choose = minx
        elif self.choose > maxx:
            self.choose = maxx
        self.line3.setValue(self.choose)
        self.disable_integral_range = False

    def save_graph(self):
        # directly call save_graph so it hides axes
        self.imageplot.save_graph()
Example #18
0
class OWSOM(OWWidget):
    name = "Self-Organizing Map"
    description = "Computation of self-organizing map."
    icon = "icons/SOM.svg"
    keywords = ["SOM"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    auto_dimension = Setting(True)
    size_x = Setting(10)
    size_y = Setting(10)
    hexagonal = Setting(1)
    initialization = Setting(0)

    attr_color = ContextSetting(None)
    size_by_instances = Setting(True)
    pie_charts = Setting(False)
    selection = Setting(None, schema_only=True)

    graph_name = "view"

    _grid_pen = QPen(QBrush(QColor(224, 224, 224)), 2)
    _grid_pen.setCosmetic(True)

    OptControls = namedtuple(
        "OptControls",
        ("shape", "auto_dim", "spin_x", "spin_y", "initialization", "start"))

    class Warning(OWWidget.Warning):
        ignoring_disc_variables = Msg("SOM ignores discrete variables.")
        missing_colors = \
            Msg("Some data instances have undefined value of '{}'.")
        missing_values = \
            Msg("{} data instance{} with undefined value(s) {} not shown.")
        single_attribute = Msg("Data contains a single numeric column.")

    class Error(OWWidget.Error):
        no_numeric_variables = Msg("Data contains no numeric columns.")
        no_defined_rows = Msg("All rows contain at least one undefined value.")

    def __init__(self):
        super().__init__()
        self.__pending_selection = self.selection
        self._optimizer = None
        self._optimizer_thread = None
        self.stop_optimization = False

        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.selection = None
        self.colors = self.thresholds = self.bin_labels = None

        box = gui.vBox(self.controlArea, box="SOM")
        shape = gui.comboBox(box,
                             self,
                             "",
                             items=("Hexagonal grid", "Square grid"))
        shape.setCurrentIndex(1 - self.hexagonal)

        box2 = gui.indentedBox(box, 10)
        auto_dim = gui.checkBox(box2,
                                self,
                                "auto_dimension",
                                "Set dimensions automatically",
                                callback=self.on_auto_dimension_changed)
        self.manual_box = box3 = gui.hBox(box2)
        spinargs = dict(value="",
                        widget=box3,
                        master=self,
                        minv=5,
                        maxv=100,
                        step=5,
                        alignment=Qt.AlignRight)
        spin_x = gui.spin(**spinargs)
        spin_x.setValue(self.size_x)
        gui.widgetLabel(box3, "×")
        spin_y = gui.spin(**spinargs)
        spin_y.setValue(self.size_y)
        gui.rubber(box3)
        self.manual_box.setEnabled(not self.auto_dimension)

        initialization = gui.comboBox(box,
                                      self,
                                      "initialization",
                                      items=("Initialize with PCA",
                                             "Random initialization",
                                             "Replicable random"))

        start = gui.button(box,
                           self,
                           "Restart",
                           callback=self.restart_som_pressed,
                           sizePolicy=(QSizePolicy.MinimumExpanding,
                                       QSizePolicy.Fixed))

        self.opt_controls = self.OptControls(shape, auto_dim, spin_x, spin_y,
                                             initialization, start)

        box = gui.vBox(self.controlArea, "Color")
        gui.comboBox(box,
                     self,
                     "attr_color",
                     searchable=True,
                     callback=self.on_attr_color_change,
                     model=DomainModel(placeholder="(Same color)",
                                       valid_types=DomainModel.PRIMITIVE))
        gui.checkBox(box,
                     self,
                     "pie_charts",
                     label="Show pie charts",
                     callback=self.on_pie_chart_change)
        gui.checkBox(box,
                     self,
                     "size_by_instances",
                     label="Size by number of instances",
                     callback=self.on_attr_size_change)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene(self)

        self.view = SomView(self.scene)
        self.view.setMinimumWidth(400)
        self.view.setMinimumHeight(400)
        self.view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.selection_changed.connect(self.on_selection_change)
        self.view.selection_moved.connect(self.on_selection_move)
        self.view.selection_mark_changed.connect(self.on_selection_mark_change)
        self.mainArea.layout().addWidget(self.view)

        self.elements = None
        self.grid = None
        self.grid_cells = None
        self.legend = None

    @Inputs.data
    def set_data(self, data):
        def prepare_data():
            if len(cont_attrs) < len(attrs):
                self.Warning.ignoring_disc_variables()
            if len(cont_attrs) == 1:
                self.Warning.single_attribute()
            x = Table.from_table(Domain(cont_attrs), data).X
            if sp.issparse(x):
                self.data = data
                self.cont_x = x.tocsr()
            else:
                mask = np.all(np.isfinite(x), axis=1)
                if not np.any(mask):
                    self.Error.no_defined_rows()
                else:
                    if np.all(mask):
                        self.data = data
                        self.cont_x = x.copy()
                    else:
                        self.data = data[mask]
                        self.cont_x = x[mask]
                    self.cont_x -= np.min(self.cont_x, axis=0)[None, :]
                    sums = np.sum(self.cont_x, axis=0)[None, :]
                    sums[sums == 0] = 1
                    self.cont_x /= sums

        def set_warnings():
            missing = len(data) - len(self.data)
            if missing == 1:
                self.Warning.missing_values(1, "", "is")
            elif missing > 1:
                self.Warning.missing_values(missing, "s", "are")

        self.stop_optimization_and_wait()

        self.closeContext()
        self.clear()
        self.Error.clear()
        self.Warning.clear()

        if data is not None:
            attrs = data.domain.attributes
            cont_attrs = [var for var in attrs if var.is_continuous]
            if not cont_attrs:
                self.Error.no_numeric_variables()
            else:
                prepare_data()

        if self.data is not None:
            self.controls.attr_color.model().set_domain(data.domain)
            self.attr_color = data.domain.class_var
            set_warnings()

        self.openContext(self.data)
        self.set_color_bins()
        self.create_legend()
        self.recompute_dimensions()
        self._set_input_summary(data and len(data))
        self.start_som()

    def _set_input_summary(self, n_tot):
        if self.data is None:
            self.info.set_input_summary(self.info.NoInput)
            return

        n = len(self.data)
        inst = str(n)
        nvars = f"{self.cont_x.shape[1]} numeric variables"
        if n < n_tot:
            inst += f" ({n_tot})"
            details = f"{n_tot - n} out of {n_tot} instances ignored " \
                      f"because of missing values;\n{nvars}"
        else:
            details = f"{n} instances; {nvars}"

        self.info.set_input_summary(inst, details)

    def clear(self):
        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.attr_color = None
        self.colors = self.thresholds = self.bin_labels = None
        if self.elements is not None:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.clear_selection()
        self.controls.attr_color.model().set_domain(None)
        self.Warning.clear()
        self.Error.clear()

    def recompute_dimensions(self):
        if not self.auto_dimension or self.cont_x is None:
            return
        dim = max(5, int(np.ceil(np.sqrt(5 * np.sqrt(self.cont_x.shape[0])))))
        self.opt_controls.spin_x.setValue(dim)
        self.opt_controls.spin_y.setValue(dim)

    def on_auto_dimension_changed(self):
        self.manual_box.setEnabled(not self.auto_dimension)
        if self.auto_dimension:
            self.recompute_dimensions()
        else:
            spin_x = self.opt_controls.spin_x
            spin_y = self.opt_controls.spin_y
            dimx = int(5 * np.round(spin_x.value() / 5))
            dimy = int(5 * np.round(spin_y.value() / 5))
            spin_x.setValue(dimx)
            spin_y.setValue(dimy)

    def on_attr_color_change(self):
        self.controls.pie_charts.setEnabled(self.attr_color is not None)
        self.set_color_bins()
        self.create_legend()
        self.rescale()
        self._redraw()

    def on_attr_size_change(self):
        self._redraw()

    def on_pie_chart_change(self):
        self._redraw()

    def clear_selection(self):
        self.selection = None
        self.redraw_selection()

    def on_selection_change(self, selection, action=SomView.SelectionSet):
        if self.data is None:  # clicks on empty canvas
            return
        if self.selection is None:
            self.selection = np.zeros(self.grid_cells.T.shape, dtype=np.int16)
        if action == SomView.SelectionSet:
            self.selection[:] = 0
            self.selection[selection] = 1
        elif action == SomView.SelectionAddToGroup:
            self.selection[selection] = max(1, np.max(self.selection))
        elif action == SomView.SelectionNewGroup:
            self.selection[selection] = 1 + np.max(self.selection)
        elif action & SomView.SelectionRemove:
            self.selection[selection] = 0
        self.redraw_selection()
        self.update_output()

    def on_selection_move(self, event: QKeyEvent):
        if self.selection is None or not np.any(self.selection):
            if event.key() in (Qt.Key_Right, Qt.Key_Down):
                x = y = 0
            else:
                x = self.size_x - 1
                y = self.size_y - 1
        else:
            x, y = np.nonzero(self.selection)
            if len(x) > 1:
                return
            if event.key() == Qt.Key_Up and y > 0:
                y -= 1
            if event.key() == Qt.Key_Down and y < self.size_y - 1:
                y += 1
            if event.key() == Qt.Key_Left and x:
                x -= 1
            if event.key() == Qt.Key_Right and x < self.size_x - 1:
                x += 1
            x -= self.hexagonal and x == self.size_x - 1 and y % 2

        if self.selection is not None and self.selection[x, y]:
            return
        selection = np.zeros(self.grid_cells.shape, dtype=bool)
        selection[x, y] = True
        self.on_selection_change(selection)

    def on_selection_mark_change(self, marks):
        self.redraw_selection(marks=marks)

    def redraw_selection(self, marks=None):
        if self.grid_cells is None:
            return

        sel_pen = QPen(QBrush(QColor(128, 128, 128)), 2)
        sel_pen.setCosmetic(True)
        mark_pen = QPen(QBrush(QColor(128, 128, 128)), 4)
        mark_pen.setCosmetic(True)
        pens = [self._grid_pen, sel_pen]

        mark_brush = QBrush(QColor(224, 255, 255))
        sels = self.selection is not None and np.max(self.selection)
        palette = LimitedDiscretePalette(number_of_colors=sels + 1)
        brushes = [QBrush(Qt.NoBrush)] + \
                  [QBrush(palette[i].lighter(165)) for i in range(sels)]

        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                cell = self.grid_cells[y, x]
                marked = marks is not None and marks[x, y]
                sel_group = self.selection is not None and self.selection[x, y]
                if marked:
                    cell.setBrush(mark_brush)
                    cell.setPen(mark_pen)
                else:
                    cell.setBrush(brushes[sel_group])
                    cell.setPen(pens[bool(sel_group)])
                cell.setZValue(marked or sel_group)

    def restart_som_pressed(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
        else:
            self.start_som()

    def start_som(self):
        self.read_controls()
        self.update_layout()
        self.clear_selection()
        if self.cont_x is not None:
            self.enable_controls(False)
            self._recompute_som()
        else:
            self.update_output()

    def read_controls(self):
        c = self.opt_controls
        self.hexagonal = c.shape.currentIndex() == 0
        self.size_x = c.spin_x.value()
        self.size_y = c.spin_y.value()

    def enable_controls(self, enable):
        c = self.opt_controls
        c.shape.setEnabled(enable)
        c.auto_dim.setEnabled(enable)
        c.start.setText("Start" if enable else "Stop")

    def update_layout(self):
        self.set_legend_pos()
        if self.elements:  # Prevent having redrawn grid but with old elements
            self.scene.removeItem(self.elements)
            self.elements = None
        self.redraw_grid()
        self.rescale()

    def _redraw(self):
        self.Warning.missing_colors.clear()
        if self.elements:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.view.set_dimensions(self.size_x, self.size_y, self.hexagonal)

        if self.cells is None:
            return
        sizes = self.cells[:, :, 1] - self.cells[:, :, 0]
        sizes = sizes.astype(float)
        if not self.size_by_instances:
            sizes[sizes != 0] = 0.8
        else:
            sizes *= 0.8 / np.max(sizes)

        self.elements = QGraphicsItemGroup()
        self.scene.addItem(self.elements)
        if self.attr_color is None:
            self._draw_same_color(sizes)
        elif self.pie_charts:
            self._draw_pie_charts(sizes)
        else:
            self._draw_colored_circles(sizes)

    @property
    def _grid_factors(self):
        return (0.5, sqrt3_2) if self.hexagonal else (0, 1)

    def _draw_same_color(self, sizes):
        fx, fy = self._grid_factors
        color = QColor(64, 64, 64)
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                n = len(self.get_member_indices(x, y))
                if not r:
                    continue
                ellipse = ColoredCircle(r / 2, color, 0)
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(f"{n} instances")
                self.elements.addToGroup(ellipse)

    def _get_color_column(self):
        color_column = \
            self.data.get_column_view(self.attr_color)[0].astype(float,
                                                                 copy=False)
        if self.attr_color.is_discrete:
            with np.errstate(invalid="ignore"):
                int_col = color_column.astype(int)
            int_col[np.isnan(color_column)] = len(self.colors)
        else:
            int_col = np.zeros(len(color_column), dtype=int)
            # The following line is unnecessary because rows with missing
            # numeric data are excluded. Uncomment it if you change SOM to
            # tolerate missing values.
            # int_col[np.isnan(color_column)] = len(self.colors)
            for i, thresh in enumerate(self.thresholds, start=1):
                int_col[color_column >= thresh] = i
        return int_col

    def _tooltip(self, colors, distribution):
        if self.attr_color.is_discrete:
            values = self.attr_color.values
        else:
            values = self._bin_names()
        tot = np.sum(distribution)
        nbhp = "\N{NON-BREAKING HYPHEN}"
        return '<table style="white-space: nowrap">' + "".join(f"""
            <tr>
                <td>
                    <font color={color.name()}>■</font>
                    <b>{escape(val).replace("-", nbhp)}</b>:
                </td>
                <td>
                    {n} ({n / tot * 100:.1f}&nbsp;%)
                </td>
            </tr>
            """ for color, val, n in zip(colors, values, distribution) if n) \
            + "</table>"

    def _draw_pie_charts(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        colors = self.colors.qcolors_w_nan
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    self.grid_cells[y, x].setToolTip("")
                    continue
                members = self.get_member_indices(x, y)
                color_dist = np.bincount(color_column[members],
                                         minlength=len(colors))
                rel_color_dist = color_dist.astype(float) / len(members)
                pie = PieChart(rel_color_dist, r / 2, colors)
                pie.setToolTip(self._tooltip(colors, color_dist))
                self.elements.addToGroup(pie)
                pie.setPos(x + (y % 2) * fx, y * fy)

    def _draw_colored_circles(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        qcolors = self.colors.qcolors_w_nan
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    continue
                members = self.get_member_indices(x, y)
                color_dist = color_column[members]
                color_dist = color_dist[color_dist < len(self.colors)]
                if len(color_dist) != len(members):
                    self.Warning.missing_colors(self.attr_color.name)
                bc = np.bincount(color_dist, minlength=len(self.colors))
                color = qcolors[np.argmax(bc)]
                ellipse = ColoredCircle(r / 2, color,
                                        np.max(bc) / len(members))
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(self._tooltip(qcolors, bc))
                self.elements.addToGroup(ellipse)

    def redraw_grid(self):
        if self.grid is not None:
            self.scene.removeItem(self.grid)
        self.grid = QGraphicsItemGroup()
        self.grid.setZValue(-200)
        self.grid_cells = np.full((self.size_y, self.size_x), None)
        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                if self.hexagonal:
                    cell = QGraphicsPathItem(_hexagon_path)
                    cell.setPos(x + (y % 2) / 2, y * sqrt3_2)
                else:
                    cell = QGraphicsRectItem(x - 0.5, y - 0.5, 1, 1)
                self.grid_cells[y, x] = cell
                cell.setPen(self._grid_pen)
                self.grid.addToGroup(cell)
        self.scene.addItem(self.grid)

    def get_member_indices(self, x, y):
        i, j = self.cells[x, y]
        return self.member_data[i:j]

    def _recompute_som(self):
        if self.cont_x is None:
            return

        class Optimizer(QObject):
            update = Signal(float, np.ndarray, np.ndarray)
            done = Signal(SOM)
            stopped = Signal()

            def __init__(self, data, widget):
                super().__init__()
                self.som = SOM(
                    widget.size_x,
                    widget.size_y,
                    hexagonal=widget.hexagonal,
                    pca_init=widget.initialization == 0,
                    random_seed=0 if widget.initialization == 2 else None)
                self.data = data
                self.widget = widget

            def callback(self, progress):
                self.update.emit(progress, self.som.weights.copy(),
                                 self.som.ssum_weights.copy())
                return not self.widget.stop_optimization

            def run(self):
                try:
                    self.som.fit(self.data,
                                 N_ITERATIONS,
                                 callback=self.callback)
                    # Report an exception, but still remove the thread
                finally:
                    self.done.emit(self.som)
                    self.stopped.emit()

        def update(_progress, weights, ssum_weights):
            progressbar.advance()
            self._assign_instances(weights, ssum_weights)
            self._redraw()

        def done(som):
            self.enable_controls(True)
            progressbar.finish()
            self._assign_instances(som.weights, som.ssum_weights)
            self._redraw()
            # This is the first time we know what was selected (assuming that
            # initialization is not set to random)
            if self.__pending_selection is not None:
                self.on_selection_change(self.__pending_selection)
                self.__pending_selection = None
            self.update_output()

        def thread_finished():
            self._optimizer = None
            self._optimizer_thread = None

        progressbar = gui.ProgressBar(self, N_ITERATIONS)

        self._optimizer = Optimizer(self.cont_x, self)
        self._optimizer_thread = QThread()
        self._optimizer_thread.setStackSize(5 * 2**20)
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._optimizer_thread.quit)
        self._optimizer.moveToThread(self._optimizer_thread)
        self._optimizer_thread.started.connect(self._optimizer.run)
        self._optimizer_thread.finished.connect(thread_finished)
        self.stop_optimization = False
        self._optimizer_thread.start()

    def stop_optimization_and_wait(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
            self._optimizer_thread.quit()
            self._optimizer_thread.wait()
            self._optimizer_thread = None

    def onDeleteWidget(self):
        self.stop_optimization_and_wait()
        self.clear()
        super().onDeleteWidget()

    def _assign_instances(self, weights, ssum_weights):
        if self.cont_x is None:
            return  # the widget is shutting down while signals still processed
        assignments = SOM.winner_from_weights(self.cont_x, weights,
                                              ssum_weights, self.hexagonal)
        members = defaultdict(list)
        for i, (x, y) in enumerate(assignments):
            members[(x, y)].append(i)
        members.pop(None, None)
        self.cells = np.empty((self.size_x, self.size_y, 2), dtype=int)
        self.member_data = np.empty(self.cont_x.shape[0], dtype=int)
        index = 0
        for x in range(self.size_x):
            for y in range(self.size_y):
                nmembers = len(members[(x, y)])
                self.member_data[index:index + nmembers] = members[(x, y)]
                self.cells[x, y] = [index, index + nmembers]
                index += nmembers

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.create_legend()  # re-wrap lines if necessary
        self.rescale()

    def rescale(self):
        if self.legend:
            leg_height = self.legend.boundingRect().height()
            leg_extra = 1.5
        else:
            leg_height = 0
            leg_extra = 1

        vw, vh = self.view.width(), self.view.height() - leg_height
        scale = min(vw / (self.size_x + 1),
                    vh / ((self.size_y + leg_extra) * self._grid_factors[1]))
        self.view.setTransform(QTransform.fromScale(scale, scale))
        if self.hexagonal:
            self.view.setSceneRect(0, -1, self.size_x - 1,
                                   (self.size_y + leg_extra) * sqrt3_2 +
                                   leg_height / scale)
        else:
            self.view.setSceneRect(-0.25, -0.25, self.size_x - 0.5,
                                   self.size_y - 0.5 + leg_height / scale)

    def update_output(self):
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return

        indices = np.zeros(len(self.data), dtype=int)
        if self.selection is not None and np.any(self.selection):
            for y in range(self.size_y):
                for x in range(self.size_x):
                    rows = self.get_member_indices(x, y)
                    indices[rows] = self.selection[x, y]

        if np.any(indices):
            sel_data = create_groups_table(self.data, indices, False, "Group")
            self.Outputs.selected_data.send(sel_data)
            self.info.set_output_summary(str(len(sel_data)))
        else:
            self.Outputs.selected_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)

        if np.max(indices) > 1:
            annotated = create_groups_table(self.data, indices)
        else:
            annotated = create_annotated_table(self.data,
                                               np.flatnonzero(indices))
        self.Outputs.annotated_data.send(annotated)

    def set_color_bins(self):
        if self.attr_color is None:
            self.thresholds = self.bin_labels = self.colors = None
        elif self.attr_color.is_discrete:
            self.thresholds = self.bin_labels = None
            self.colors = self.attr_color.palette
        else:
            col = self.data.get_column_view(self.attr_color)[0].astype(float)
            if self.attr_color.is_time:
                binning = time_binnings(col, min_bins=4)[-1]
            else:
                binning = decimal_binnings(col, min_bins=4)[-1]
            self.thresholds = binning.thresholds[1:-1]
            self.bin_labels = (binning.labels[1:-1],
                               binning.short_labels[1:-1])
            palette = BinnedContinuousPalette.from_palette(
                self.attr_color.palette, binning.thresholds)
            self.colors = palette

    def create_legend(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)
            self.legend = None
        if self.attr_color is None:
            return

        if self.attr_color.is_discrete:
            names = self.attr_color.values
        else:
            names = self._bin_names()

        items = []
        size = 8
        for name, color in zip(names, self.colors.qcolors):
            item = QGraphicsItemGroup()
            item.addToGroup(
                CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                Qt.gray, color))
            item.addToGroup(CanvasText(None, name, size, 0, Qt.AlignVCenter))
            items.append(item)

        self.legend = wrap_legend_items(items,
                                        hspacing=20,
                                        vspacing=16 + size,
                                        max_width=self.view.width() - 25)
        self.legend.setFlags(self.legend.ItemIgnoresTransformations)
        self.legend.setTransform(
            QTransform.fromTranslate(-self.legend.boundingRect().width() / 2,
                                     0))
        self.scene.addItem(self.legend)
        self.set_legend_pos()

    def _bin_names(self):
        labels, short_labels = self.bin_labels
        return \
            [f"< {labels[0]}"] \
            + [f"{x} - {y}" for x, y in zip(labels, short_labels[1:])] \
            + [f"≥ {labels[-1]}"]

    def set_legend_pos(self):
        if self.legend is None:
            return
        self.legend.setPos(self.size_x / 2,
                           (self.size_y + 0.2 + 0.3 * self.hexagonal) *
                           self._grid_factors[1])

    def send_report(self):
        self.report_plot()
        if self.attr_color:
            self.report_caption(
                f"Self-organizing map colored by '{self.attr_color.name}'")
Example #19
0
class OWWordList(OWWidget):
    name = "Word List"
    description = "Create a list of words."
    icon = "icons/WordList.svg"
    priority = 1000

    class Inputs:
        words = Input("Words", Table)

    class Outputs:
        selected_words = Output("Selected Words", Table)
        words = Output("Words", Table)

    class Warning(OWWidget.Warning):
        no_string_vars = Msg("Input needs at least one Text variable.")

    NONE, CACHED, LIBRARY = range(3)  # library list modification types

    want_main_area = False
    resizing_enabled = True

    settingsHandler = DomainContextHandler()
    word_list_library: List[Dict] = Setting([
        {
            "name": WordList.generate_word_list_name([]),
            "words": []
        },
    ])
    word_list_index: int = Setting(0)
    words_var: Optional[StringVariable] = ContextSetting(None)
    update_rule_index: int = Setting(UpdateRules.INTERSECT)
    words: List[str] = Setting(None, schema_only=True)
    selected_words: Set[str] = Setting(set(), schema_only=True)

    def __init__(self):
        super().__init__(self)
        flags = Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsEditable
        self.library_model = PyListModel([], self, flags=flags)
        self.words_model = PyListModel([], self, flags=flags, enable_dnd=True)

        self.library_view: QListView = None
        self.words_view: ListView = None

        self.__input_words_model = DomainModel(valid_types=(StringVariable, ))
        self.__input_words: Optional[Table] = None

        self.__library_box: QGroupBox = gui.vBox(None, "Library")
        self.__input_box: QGroupBox = gui.vBox(None, "Input")
        self.__words_box: QGroupBox = gui.vBox(None, box=True)
        self.__update_rule_rb: QRadioButton = None

        self.__add_word_action: QAction = None
        self.__remove_word_action: QAction = None

        self._setup_gui()
        self._restore_state()
        self.settingsAboutToBePacked.connect(self._save_state)

    def _setup_gui(self):
        layout = QGridLayout()
        gui.widgetBox(self.controlArea, orientation=layout)

        self._setup_library_box()
        self._setup_input_box()
        self._setup_words_box()

        layout.addWidget(self.__library_box, 0, 0)
        layout.addWidget(self.__input_box, 1, 0)
        layout.addWidget(self.__words_box, 0, 1, 0, 1)

    def _setup_library_box(self):
        self.library_view = QListView(
            editTriggers=QListView.DoubleClicked | QListView.EditKeyPressed,
            minimumWidth=200,
            sizePolicy=QSizePolicy(QSizePolicy.Ignored, QSizePolicy.Expanding),
        )
        self.library_view.setItemDelegate(WordListItemDelegate(self))
        self.library_view.setModel(self.library_model)
        self.library_view.selectionModel().selectionChanged.connect(
            self.__on_library_selection_changed)

        self.__library_box.layout().setSpacing(1)
        self.__library_box.layout().addWidget(self.library_view)

        actions_widget = ModelActionsWidget()
        actions_widget.layout().setSpacing(1)

        action = QAction("+", self)
        action.setToolTip("Add a new word list to the library")
        action.triggered.connect(self.__on_add_word_list)
        actions_widget.addAction(action)

        action = QAction("\N{MINUS SIGN}", self)
        action.setToolTip("Remove word list from library")
        action.triggered.connect(self.__on_remove_word_list)
        actions_widget.addAction(action)

        action = QAction("Update", self)
        action.setToolTip("Save changes in the editor to library")
        action.setShortcut(QKeySequence(QKeySequence.Save))
        action.triggered.connect(self.__on_update_word_list)
        actions_widget.addAction(action)

        gui.rubber(actions_widget.layout())

        action = QAction("More", self, toolTip="More actions")

        new_from_file = QAction("Import Words from File", self)
        new_from_file.triggered.connect(self.__on_import_word_list)

        save_to_file = QAction("Save Words to File", self)
        save_to_file.setShortcut(QKeySequence(QKeySequence.SaveAs))
        save_to_file.triggered.connect(self.__on_save_word_list)

        menu = QMenu(actions_widget)
        menu.addAction(new_from_file)
        menu.addAction(save_to_file)
        action.setMenu(menu)
        button = actions_widget.addAction(action)
        button.setPopupMode(QToolButton.InstantPopup)
        self.__library_box.layout().addWidget(actions_widget)

    def __on_library_selection_changed(self, selected: QItemSelection, *_):
        index = [i.row() for i in selected.indexes()]
        if index:
            current = index[0]
            word_list: WordList = self.library_model[current]
            self.word_list_index = current
            self.selected_words = set()
            self.words_model.wrap(list(word_list.cached_words))
            self._apply_update_rule()

    def __on_add_word_list(self):
        taken = [l.name for l in self.library_model]
        name = WordList.generate_word_list_name(taken)
        word_list = WordList(name, self.words_model[:])
        self.library_model.append(word_list)
        self._set_selected_word_list(len(self.library_model) - 1)

    def __on_remove_word_list(self):
        index = self._get_selected_word_list_index()
        if index is not None:
            del self.library_model[index]
            self._set_selected_word_list(max(index - 1, 0))
            self._apply_update_rule()

    def __on_update_word_list(self):
        self._set_word_list_modified(mod_type=self.LIBRARY)

    def __on_import_word_list(self):
        filename, _ = QFileDialog.getOpenFileName(
            self, "Open Word List", os.path.expanduser("~/"),
            "Text files (*.txt)\nAll files(*.*)")
        if filename:
            name = os.path.basename(filename)
            with open(filename, encoding="utf-8") as f:
                words = [line.strip() for line in f.readlines()]
            self.library_model.append(WordList(name, words, filename=filename))
            self._set_selected_word_list(len(self.library_model) - 1)
            self._apply_update_rule()

    def __on_save_word_list(self):
        index = self._get_selected_word_list_index()
        if index is not None:
            word_list = self.library_model[index]
            filename = word_list.filename
        else:
            filename = os.path.expanduser("~/")

        filename, _ = QFileDialog.getSaveFileName(
            self, "Save Word List", filename,
            "Text files (*.txt)\nAll files(*.*)")
        if filename:
            head, tail = os.path.splitext(filename)
            if not tail:
                filename = head + ".txt"

            with open(filename, "w", encoding="utf-8") as f:
                for word in self.words_model:
                    f.write(f"{word}\n")

    def _setup_input_box(self):
        gui.comboBox(self.__input_box,
                     self,
                     "words_var",
                     label="Word variable:",
                     orientation=Qt.Vertical,
                     model=self.__input_words_model,
                     callback=self._apply_update_rule)
        gui.radioButtons(self.__input_box,
                         self,
                         "update_rule_index",
                         UpdateRules.ITEMS,
                         label="Update: ",
                         orientation=Qt.Vertical,
                         callback=self.__on_update_rule_changed)
        self.__input_box.setEnabled(False)

    def __on_update_rule_changed(self):
        self._enable_words_actions()
        self._apply_update_rule()

    def _setup_words_box(self):
        self.words_view = ListView()
        self.words_view.drop_finished.connect(self.__on_words_data_changed)
        self.words_view.setModel(self.words_model)
        self.words_view.selectionModel().selectionChanged.connect(
            self.__on_words_selection_changed)

        self.words_model.dataChanged.connect(self.__on_words_data_changed)

        self.__words_box.layout().setSpacing(1)
        self.__words_box.layout().addWidget(self.words_view)

        actions_widget = ModelActionsWidget()
        actions_widget.layout().setSpacing(1)

        action = QAction("+", self.words_view, toolTip="Add a new word")
        action.triggered.connect(self.__on_add_word)
        actions_widget.addAction(action)
        self.__add_word_action = action

        action = QAction("\N{MINUS SIGN}", self, toolTip="Remove word")
        action.triggered.connect(self.__on_remove_word)
        actions_widget.addAction(action)
        self.__remove_word_action = action

        gui.rubber(actions_widget)

        action = QAction("Sort", self)
        action.setToolTip("Sort words alphabetically")
        action.triggered.connect(self.__on_apply_sorting)
        actions_widget.addAction(action)

        self.__words_box.layout().addWidget(actions_widget)

    def __on_words_data_changed(self):
        self._set_word_list_modified(mod_type=self.CACHED)
        self.commit()

    def __on_words_selection_changed(self):
        self.commit()

    def __on_add_word(self):
        row = self.words_model.rowCount()
        if not self.words_model.insertRow(self.words_model.rowCount()):
            return
        with disconnected(self.words_view.selectionModel().selectionChanged,
                          self.__on_words_selection_changed):
            self._set_selected_words([0])
            index = self.words_model.index(row, 0)
            self.words_view.setCurrentIndex(index)
            self.words_model.setItemData(index, {Qt.EditRole: ""})
        self.words_view.edit(index)

    def __on_remove_word(self):
        rows = self.words_view.selectionModel().selectedRows(0)
        if not rows:
            return

        indices = sorted([row.row() for row in rows], reverse=True)
        with disconnected(self.words_view.selectionModel().selectionChanged,
                          self.__on_words_selection_changed):
            for index in indices:
                self.words_model.removeRow(index)
            if self.words_model:
                self._set_selected_words([max(0, indices[-1] - 1)])
        self.__on_words_data_changed()

    def __on_apply_sorting(self):
        if not self.words_model:
            return
        words = self.words_model[:]
        mask = np.zeros(len(words), dtype=bool)
        selection = self._get_selected_words_indices()
        if selection:
            mask[selection] = True

        indices = np.argsort(words)
        self.words_model.wrap([words[i] for i in indices])
        self._set_word_list_modified(mod_type=self.CACHED)
        if selection:
            self._set_selected_words(list(np.flatnonzero(mask[indices])))
        else:
            self.commit()

    @Inputs.words
    def set_words(self, words: Optional[Table]):
        self.closeContext()
        self.__input_words = words
        self._check_input_words()
        self._init_controls()
        self.openContext(self.__input_words)
        self._apply_update_rule()

    def _check_input_words(self):
        self.Warning.no_string_vars.clear()
        if self.__input_words:
            metas = self.__input_words.domain.metas
            if not any(isinstance(m, StringVariable) for m in metas):
                self.Warning.no_string_vars()
                self.__input_words = None

    def _init_controls(self):
        words = self.__input_words
        domain = words.domain if words is not None else None
        self.__input_words_model.set_domain(domain)
        if len(self.__input_words_model) > 0:
            self.words_var = self.__input_words_model[0]
        self.__input_box.setEnabled(bool(self.__input_words_model))
        self._enable_words_actions()

    def _enable_words_actions(self):
        if bool(self.__input_words_model) \
                and self.update_rule_index != UpdateRules.LIBRARY:
            self.words_view.setEditTriggers(QListView.NoEditTriggers)
            self.__add_word_action.setEnabled(False)
            self.__remove_word_action.setEnabled(False)
        else:
            self.words_view.setEditTriggers(QListView.DoubleClicked
                                            | QListView.EditKeyPressed)
            self.__add_word_action.setEnabled(True)
            self.__remove_word_action.setEnabled(True)

    def _apply_update_rule(self):
        lib_index = self._get_selected_word_list_index()
        lib_words, in_words, update_rule = [], [], UpdateRules.LIBRARY
        if lib_index is not None:
            lib_words = self.library_model[lib_index].cached_words
        else:
            lib_words = self.words_model[:]
        if self.__input_words is not None:
            in_words = self.__input_words.get_column_view(self.words_var)[0]
            in_words = list(in_words)
            update_rule = self.update_rule_index

        UpdateRules.update(self.words_model, lib_words, in_words, update_rule)
        if lib_index is not None:
            cached = self.library_model[lib_index].cached_words
            modified = WordList.NotModified if cached == self.words_model[:] \
                else WordList.Modified
            self.library_model[lib_index].update_rule_flag = modified
            self._set_word_list_modified(mod_type=self.NONE)
            self.library_view.repaint()

        # Apply selection. selection_changed invokes commit().
        # If there is no selection, call commit explicitly.
        if any(w in self.selected_words for w in self.words_model):
            self.set_selected_words()
            self.words_view.repaint()
        else:
            self.commit()

    def commit(self):
        selection = self._get_selected_words_indices()
        self.selected_words = set(np.array(self.words_model)[selection])

        words, selected_words = None, None
        if self.words_model:
            words_var = StringVariable("Words")
            words_var.attributes = {"type": "words"}
            domain = Domain([], metas=[words_var])
            _words = Table.from_list(domain, [[w] for w in self.words_model])
            _words.name = "Words"
            if selection:
                selected_words = _words[selection]
            words = create_annotated_table(_words, selection)
        self.Outputs.words.send(words)
        self.Outputs.selected_words.send(selected_words)

    def _set_word_list_modified(self, mod_type):
        index = self._get_selected_word_list_index()
        if index is not None:
            if mod_type == self.LIBRARY:
                self.library_model[index].words = self.words_model[:]
                self.library_model[index].cached_words = self.words_model[:]
                self.library_model[index].update_rule_flag \
                    = WordList.NotModified
            elif mod_type == self.CACHED:
                self.library_model[index].cached_words = self.words_model[:]
            elif mod_type == self.NONE:
                pass
            else:
                raise NotImplementedError
            self.library_model.emitDataChanged(index)
            self.library_view.repaint()

    def _set_selected_word_list(self, index: int):
        sel_model: QItemSelectionModel = self.library_view.selectionModel()
        sel_model.select(self.library_model.index(index, 0),
                         QItemSelectionModel.ClearAndSelect)

    def _get_selected_word_list_index(self) -> Optional[int]:
        rows = self.library_view.selectionModel().selectedRows()
        return rows[0].row() if rows else None

    def _set_selected_words(self, indices: List[int]):
        selection = QItemSelection()
        sel_model: QItemSelectionModel = self.words_view.selectionModel()
        for i in indices:
            selection.append(QItemSelectionRange(self.words_model.index(i, 0)))
        sel_model.select(selection, QItemSelectionModel.ClearAndSelect)

    def _get_selected_words_indices(self) -> List[int]:
        rows = self.words_view.selectionModel().selectedRows()
        return [row.row() for row in rows]

    def set_selected_words(self):
        if self.selected_words:
            indices = [
                i for i, w in enumerate(self.words_model)
                if w in self.selected_words
            ]
            self._set_selected_words(indices)

    def _restore_state(self):
        source = [WordList.from_dict(s) for s in self.word_list_library]
        self.library_model.wrap(source)
        # __on_library_selection_changed() (invoked by _set_selected_word_list)
        # clears self.selected_words
        selected_words = self.selected_words
        self._set_selected_word_list(self.word_list_index)

        if self.words is not None:
            self.words_model.wrap(list(self.words))
            self._set_word_list_modified(mod_type=self.CACHED)
            if selected_words:
                self.selected_words = selected_words
                self.set_selected_words()
            elif len(self.word_list_library) > self.word_list_index and \
                self.word_list_library[self.word_list_index] != self.words:
                self.commit()

    def _save_state(self):
        self.word_list_library = [s.as_dict() for s in self.library_model]
        self.words = self.words_model[:]

    def send_report(self):
        library = self.library_model[self.word_list_index].name \
            if self.library_model else "/"
        settings = [("Library", library)]
        if self.__input_words:
            self.report_data("Input Words", self.__input_words)
            settings.append(("Word variable", self.words_var))
            rule = UpdateRules.ITEMS[self.update_rule_index]
            settings.append(("Update", rule))
        self.report_items("Settings", settings)
        self.report_paragraph("Words", ", ".join(self.words_model[:]))
Example #20
0
class DomainContextSettingsHandlerTests(unittest.TestCase):
    def setUp(self):
        self.handler = DomainContextHandler(attributes_in_res=True,
                                            metas_in_res=True)
        self.handler.read_defaults = lambda: None  # Disable reading settings from disk
        self.handler.bind(MockWidget)
        self.widget = MockWidget()
        encoded_attributes, encoded_metas = self.handler.encode_domain(domain)
        self.widget.current_context.attributes = encoded_attributes
        self.widget.current_context.metas = encoded_metas
        self.handler.initialize(self.widget)
        self.handler.initialize(self.widget.subprovider)
        self.handler.open_context(self.widget, domain)

    def test_settings_from_widget(self):
        widget = self.widget
        widget.ordinary_setting = VALUE
        widget.string_setting = VALUE
        widget.list_setting = [1, 2, 3]
        widget.dict_setting = {1: 2}
        widget.continuous_setting = CONTINOUS_ATTR
        widget.discrete_setting = DISCRETE_ATTR_ABC
        widget.class_setting = DISCRETE_CLASS_GHI
        widget.excluded_meta_setting = DISCRETE_META_JKL
        widget.meta_setting = DISCRETE_META_JKL

        self.handler.settings_from_widget(widget)

        values = widget.current_context.values
        self.assertEqual((VALUE, UNKNOWN_TYPE), values['ordinary_setting'])
        self.assertEqual((VALUE, UNKNOWN_TYPE), values['string_setting'])
        self.assertEqual([1, 2, 3], values['list_setting'])
        self.assertEqual(({1: 2}, UNKNOWN_TYPE), values['dict_setting'])
        self.assertEqual((CONTINOUS_ATTR, Continuous), values['continuous_setting'])
        self.assertEqual((DISCRETE_ATTR_ABC, Discrete), values['discrete_setting'])
        self.assertEqual((DISCRETE_CLASS_GHI, Discrete), values['class_setting'])
        self.assertEqual((DISCRETE_META_JKL, UNKNOWN_TYPE), values['excluded_meta_setting'])
        self.assertEqual((DISCRETE_META_JKL, Discrete), values['meta_setting'])

    def test_settings_to_widget(self):
        self.widget.current_context.values = dict(
            string_setting=(VALUE, -2),
            continuous_setting=(CONTINOUS_ATTR, Continuous),
            discrete_setting=(DISCRETE_ATTR_ABC, Discrete),
            list_setting=[1, 2, 3],
            attr_list_setting=[DISCRETE_ATTR_ABC, DISCRETE_CLASS_GHI],
            selection1=[0],
            attr_tuple_list_setting=[(DISCRETE_META_JKL, Discrete),
                                     (CONTINUOUS_META, Continuous)],
            selection2=[1],
        )

        self.handler.settings_to_widget(self.widget)

        self.assertEqual(self.widget.string_setting, VALUE)
        self.assertEqual(self.widget.continuous_setting, CONTINOUS_ATTR)
        self.assertEqual(self.widget.discrete_setting, DISCRETE_ATTR_ABC)
        self.assertEqual(self.widget.list_setting, [1, 2, 3])
        self.assertEqual(self.widget.attr_list_setting, [DISCRETE_ATTR_ABC, DISCRETE_CLASS_GHI])
        self.assertEqual(self.widget.attr_tuple_list_setting,
                         [DISCRETE_META_JKL, CONTINUOUS_META])
        self.assertEqual(self.widget.selection1, [0])
        self.assertEqual(self.widget.selection2, [1])

    def test_settings_to_widget_filters_selections(self):
        self.widget.current_context.values = dict(
            attr_list_setting=[DISCRETE_META_JKL, DISCRETE_ATTR_ABC,
                               CONTINUOUS_META, DISCRETE_CLASS_GHI],
            selection1=[1, 2],
        )

        self.handler.settings_to_widget(self.widget)

        self.assertEqual(self.widget.attr_list_setting, [DISCRETE_ATTR_ABC, DISCRETE_CLASS_GHI])
        self.assertEqual(self.widget.selection1, [0])

    def test_perfect_match_returns_2(self):
        attrs, metas = self.handler.encode_domain(domain)
        mock_context = Mock(attributes=attrs, metas=metas, values={})

        self.assertEqual(self.match(mock_context), 2.)

    def test_match_when_nothing_to_match_returns_point_1(self):
        attrs, metas = self.handler.encode_domain(domain)
        mock_context = Mock(values={})

        self.assertEqual(self.match(mock_context), 0.1)

    def test_match_if_all_values_match_returns_1(self):
        mock_context = Mock(values=dict(
            discrete_setting=(DISCRETE_ATTR_ABC, Discrete),
            required_setting=(DISCRETE_ATTR_ABC, Discrete),
        ))

        self.assertEqual(self.match(mock_context), 1.)

    def test_match_if_all_list_values_match_returns_1(self):
        mock_context = Mock(values=dict(
            discrete_setting=("df1", Discrete)
        ))
        self.assertEqual(self.match(mock_context), 1.)

    def test_match_if_all_required_list_values_match_returns_1(self):
        mock_context = Mock(values=dict(
            required_setting=(DISCRETE_ATTR_ABC, Discrete)
        ))

        self.assertEqual(self.match(mock_context), 1.)

    def test_clone_context(self):
        mock_context = Mock(values=dict(
            required_setting=(DISCRETE_ATTR_ABC, Discrete)
        ))
        attrs, metas = self.handler.encode_domain(domain)
        cloned_context = self.handler.clone_context(mock_context, domain, attrs, metas)
        self.assertEqual(cloned_context.values, mock_context.values)

    def add_setting(self, widget, name, setting):
        setting.name = name
        setattr(widget, name, setting.default)
        self.handler.provider.settings[name] = setting

    def match(self, context):
        attrs, metas = self.handler.encode_domain(domain)
        return self.handler.match(context, None, attrs, metas)

    def test_initialize_sets_current_context(self):
        self.widget = MockWidget()
        del self.widget.current_context
        self.handler.initialize(self.widget)
        self.assertIs(self.widget.current_context, None)
Example #21
0
class DomainContextSettingsHandlerTests(unittest.TestCase):
    def setUp(self):
        self.handler = DomainContextHandler(attributes_in_res=True,
                                            metas_in_res=True)
        self.handler.read_defaults = lambda: None  # Disable reading from disk
        self.domain = self._create_domain()

    def test_encode_domain_with_match_none(self):
        self.handler.match_values = self.handler.MATCH_VALUES_NONE

        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {
            'cf1': VarTypes.Continuous,
            'df1': VarTypes.Discrete,
            'df2': VarTypes.Discrete,
            'dc1': VarTypes.Discrete,
        })
        self.assertEqual(encoded_metas, {
            'cm1': VarTypes.Continuous,
            'dm1': VarTypes.Discrete,
        })

    def test_encode_domain_with_match_class(self):
        self.handler.match_values = self.handler.MATCH_VALUES_CLASS

        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {
            'cf1': VarTypes.Continuous,
            'df1': VarTypes.Discrete,
            'df2': VarTypes.Discrete,
            'dc1': ["g", "h", "i"],
        })
        self.assertEqual(encoded_metas, {
            'cm1': VarTypes.Continuous,
            'dm1': VarTypes.Discrete,
        })

    def test_encode_domain_with_match_all(self):
        self.handler.match_values = self.handler.MATCH_VALUES_ALL

        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {
            'cf1': VarTypes.Continuous,
            'df1': ["a", "b", "c"],
            'df2': ["d", "e", "f"],
            'dc1': ["g", "h", "i"],
        })
        self.assertEqual(encoded_metas, {
            'cm1': VarTypes.Continuous,
            'dm1': ["j", "k", "l"],
        })

    def test_encode_domain_with_false_attributes_in_res(self):
        self.handler = DomainContextHandler(attributes_in_res=False,
                                            metas_in_res=True)
        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {
            'cm1': VarTypes.Continuous,
            'dm1': VarTypes.Discrete,
        })

    def test_encode_domain_with_false_metas_in_res(self):
        self.handler = DomainContextHandler(attributes_in_res=True,
                                            metas_in_res=False)
        encoded_attributes, encoded_metas = \
            self.handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {
            'cf1': VarTypes.Continuous,
            'df1': VarTypes.Discrete,
            'df2': VarTypes.Discrete,
            'dc1': VarTypes.Discrete,
        })
        self.assertEqual(encoded_metas, {})

    def test_settings_from_widget(self):
        widget = MockWidget()
        widget.current_context.attributes, widget.current_context.metas = \
            self.handler.encode_domain(self.domain)
        self.add_setting(widget, "string_setting", ContextSetting("abc"))
        self.add_setting(widget, "list_setting", ContextSetting([1, 2, 3]))
        self.add_setting(widget, "dict_setting", ContextSetting({1: 2}))
        self.add_setting(widget, "continuous_setting", ContextSetting("cf1"))
        self.add_setting(widget, "discrete_setting", ContextSetting("df1"))
        self.add_setting(widget, "class_setting", ContextSetting("dc1"))
        self.add_setting(widget, "excluded_meta_setting", ContextSetting("dm1"))
        self.add_setting(widget, "meta_setting",
                         ContextSetting("dm1", exclude_metas=False))

        self.handler.settings_from_widget(widget)

        self.assertEqual(widget.current_context.values, dict(
            string_setting=("abc", -2),
            list_setting=[1, 2, 3],
            dict_setting=({1: 2}, -2),
            continuous_setting=("cf1", VarTypes.Continuous),
            discrete_setting=("df1", VarTypes.Discrete),
            class_setting=("dc1", VarTypes.Discrete),
            excluded_meta_setting=("dm1", -2),
            meta_setting=("dm1", VarTypes.Discrete),
        ))

    def test_settings_to_widget(self):
        widget = MockWidget()
        widget.current_context.attributes, widget.current_context.metas = \
            self.handler.encode_domain(self.domain)
        self.add_setting(widget, "string_setting", ContextSetting(""))
        self.add_setting(widget, "continuous_setting", ContextSetting(""))
        self.add_setting(widget, "discrete_setting", ContextSetting(""))
        self.add_setting(widget, "list_setting", ContextSetting([]))
        self.add_setting(widget, "attr_list_setting",
                         ContextSetting([], selected="selection1"))
        self.add_setting(widget, "attr_tuple_list_setting",
                         ContextSetting([], selected="selection2",
                                        exclude_metas=False))
        widget.current_context.values = dict(
            string_setting=("abc", -2),
            continuous_setting=("cf1", VarTypes.Continuous),
            discrete_setting=("df1", VarTypes.Discrete),
            list_setting=[1, 2, 3],
            attr_list_setting=["df1", "dc1"],
            selection1=[0],
            attr_tuple_list_setting=[("dm1", VarTypes.Discrete),
                                     ("cm1", VarTypes.Continuous)],
            selection2=[1],
        )

        self.handler.settings_to_widget(widget)

        self.assertEqual(widget.string_setting, "abc")
        self.assertEqual(widget.continuous_setting, "cf1")
        self.assertEqual(widget.discrete_setting, "df1")
        self.assertEqual(widget.list_setting, [1, 2, 3])
        self.assertEqual(widget.attr_list_setting, ["df1", "dc1"])
        self.assertEqual(widget.attr_tuple_list_setting,
                         ["dm1", "cm1"])
        self.assertEqual(widget.selection1, [0])
        self.assertEqual(widget.selection2, [1])

    def test_settings_to_widget_filters_selections(self):
        widget = MockWidget()
        widget.current_context.attributes, widget.current_context.metas = \
            self.handler.encode_domain(self.domain)
        self.add_setting(widget, "attr_list_setting",
                         ContextSetting([], selected="selection"))
        widget.current_context.values = dict(
            string_setting=("abc", -2),
            continuous_setting=("cf1", VarTypes.Continuous),
            discrete_setting=("df1", VarTypes.Discrete),
            list_setting=[1, 2, 3],
            attr_list_setting=["dm1", "df1", "cm1", "dc1"],
            selection=[1, 2],
        )

        self.handler.settings_to_widget(widget)

        self.assertEqual(widget.attr_list_setting, ["df1", "dc1"])
        self.assertEqual(widget.selection, [0])

    def test_perfect_match_returns_2(self):
        attrs, metas = self.handler.encode_domain(self.domain)
        mock_context = Mock(attributes=attrs, metas=metas, values={})

        self.assertEqual(self.handler.match(mock_context, None, attrs, metas), 2.)

    def test_match_when_nothing_to_match_returns_point_1(self):
        attrs, metas = self.handler.encode_domain(self.domain)
        mock_context = Mock(values={})

        self.assertEqual(self.handler.match(mock_context, None, attrs, metas), 0.1)

    def test_match_if_all_values_match_returns_1(self):
        attrs, metas = self.handler.encode_domain(self.domain)
        mock_context = Mock(values={})
        self.add_setting(mock_context, "setting", ContextSetting(""))
        self.add_setting(mock_context, "required_setting",
                         ContextSetting("", required=ContextSetting.REQUIRED))
        mock_context.values["setting"] = ("df1", VarTypes.Discrete)
        mock_context.values["required_setting"] = ("df1", VarTypes.Discrete)

        self.assertEqual(self.handler.match(mock_context, None, attrs, metas), 1.)

    def test_match_if_all_list_values_match_returns_1(self):
        attrs, metas = self.handler.encode_domain(self.domain)
        mock_context = Mock(values={})
        self.add_setting(mock_context, "setting", ContextSetting(""))
        mock_context.values["setting"] = [("df1", VarTypes.Discrete)]

        self.assertEqual(self.handler.match(mock_context, None, attrs, metas), 1.)

    def test_match_if_all_required_list_values_match_returns_1(self):
        attrs, metas = self.handler.encode_domain(self.domain)
        mock_context = Mock(values={})
        self.add_setting(mock_context, "required_setting",
                         ContextSetting("", required=ContextSetting.REQUIRED))
        mock_context.values["required_setting"] = [("df1", VarTypes.Discrete)]

        self.assertEqual(self.handler.match(mock_context, None, attrs, metas), 1.)

    def add_setting(self, widget, name, setting):
        setting.name = name
        setattr(widget, name, setting.default)
        self.handler.settings[name] = setting

    def _create_domain(self):
        features = [
            ContinuousVariable(name="cf1"),
            DiscreteVariable(name="df1", values=["a", "b", "c"]),
            DiscreteVariable(name="df2", values=["d", "e", "f"])
        ]
        class_vars = [
            DiscreteVariable(name="dc1", values=["g", "h", "i"])
        ]
        metas = [
            ContinuousVariable(name="cm1"),
            DiscreteVariable(name="dm1", values=["j", "k", "l"]),
        ]
        return Domain(features, class_vars, metas)
Example #22
0
 def setUp(self):
     self.handler = DomainContextHandler(attributes_in_res=True,
                                         metas_in_res=True)
     self.handler.read_defaults = lambda: None  # Disable reading from disk
     self.domain = self._create_domain()
class TestDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(
            attributes=[ContinuousVariable('c1'),
                        DiscreteVariable('d1', values='abc'),
                        DiscreteVariable('d2', values='def')],
            class_vars=[DiscreteVariable('d3', values='ghi')],
            metas=[ContinuousVariable('c2'),
                   DiscreteVariable('d4', values='jkl')]
        )
        self.args = (self.domain,
                     {'c1': Continuous - 100, 'd1': Discrete - 100,
                      'd2': Discrete - 100, 'd3': Discrete - 100},
                     {'c2': Continuous - 100, 'd4': Discrete - 100, })
        self.handler = DomainContextHandler()
        self.handler.read_defaults = lambda: None

    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous - 100, 'd1': Discrete - 100,
                          'd2': Discrete - 100, 'd3': Discrete - 100})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous - 100, 'd4': Discrete - 100, })

    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous - 100, 'd1': Discrete - 100,
                          'd2': Discrete - 100,
                          'd3': list('ghi')})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous - 100, 'd4': Discrete - 100})

    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous - 100, 'd1': list('abc'),
                          'd2': list('def'), 'd3': list('ghi')})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous - 100, 'd4': list('jkl')})

    def test_match_returns_2_on_perfect_match(self):
        context = Mock(
            attributes=self.args[1], metas=self.args[2], values={})
        self.assertEqual(2., self.handler.match(context, *self.args))

    def test_match_returns_1_if_everything_matches(self):
        self.handler.bind(SimpleWidget)

        # Attributes in values
        context = Mock(values=dict(
            with_metas=('d1', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in values
        context = Mock(values=dict(
            with_metas=('d4', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Attributes in lists
        context = Mock(values=dict(
            with_metas=[("d1", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in lists
        context = Mock(values=dict(
            with_metas=[("d4", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

    def test_match_returns_point_1_when_nothing_to_match(self):
        self.handler.bind(SimpleWidget)

        context = Mock(values={})
        self.assertEqual(0.1, self.handler.match(context, *self.args))

    def test_match_returns_zero_on_incompatible_context(self):
        self.handler.bind(SimpleWidget)

        # required
        context = Mock(values=dict(required=('u', Discrete),
                                   with_metas=('d1', Discrete)))
        self.assertEqual(0, self.handler.match(context, *self.args))

    def test_clone_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)],
            required=('u', Continuous)
        ))

        new_values = self.handler.clone_context(context, *self.args).values

        self.assertEqual(new_values['text'], ('u', -2))
        self.assertEqual([('d1', Discrete), ('c1', Continuous)],
                         new_values['with_metas'])
        self.assertNotIn('required', new_values)

    def test_open_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d2', Discrete)]
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        old_metas_list = widget.with_metas
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertIs(old_metas_list, widget.with_metas)

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('d2', Discrete)])

    def test_open_context_with_imperfect_match(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(None, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)]
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('c1', Continuous)])

    def test_open_context_with_no_match(self):
        self.handler.bind(SimpleWidget)
        widget = SimpleWidget()
        self.handler.initialize(widget)
        widget.text = 'u'

        self.handler.open_context(widget, self.args[0])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [])
        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertEqual(context.values['text'], ('u', -2))

    def test_filter_value(self):
        setting = ContextSetting([])
        setting.name = "value"

        def test_filter(before_value, after_value):
            data = dict(value=before_value)
            self.handler.filter_value(setting, data, *self.args)
            self.assertEqual(data.get("value", None), after_value)

        # filter list values
        test_filter([], [])
        # When list contains attributes asa tuple of (name, type),
        # Attributes not present in domain should be filtered out
        test_filter([("d1", Discrete), ("d1", Continuous),
                     ("c1", Continuous), ("c1", Discrete)],
                    [("d1", Discrete), ("c1", Continuous)])
        # All other values in list should remain
        test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])

    def test_encode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.encode_setting(None, setting, var)
        self.assertEqual(val, (var.name, 100 + vartype(var)))

        # Should not crash on anonymous variables
        var.name = ""
        val = self.handler.encode_setting(None, setting, var)
        self.assertEqual(val, (var.name, 100 + vartype(var)))

    def test_encode_list_settings(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.encode_setting(None, setting, [None, var1, var2])
        self.assertEqual(
            val,
            ([None,
              (var1.name, 100 + vartype(var1)),
              (var2.name, 100 + vartype(var2))], -3))

        a_list = [1, 2, 3]
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [1, 2, 3])
        self.assertIsNot(val, a_list)

        a_list = []
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [])
        self.assertIsNot(val, a_list)

        a_list = [None, None]
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [None, None])
        self.assertIsNot(val, a_list)

    def test_decode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.decode_setting(setting, (var.name, 100 + vartype(var)),
                                          self.domain)
        self.assertIs(val, var)

        all_metas_domain = Domain([], metas=[var])
        val = self.handler.decode_setting(setting, (var.name, 100 + vartype(var)),
                                          all_metas_domain)
        self.assertIs(val, var)

    def test_decode_list_setting(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.decode_setting(
            setting,
            ([None,
              (var1.name, 100 + vartype(var1)),
              (var2.name, 100 + vartype(var2))], -3),
            self.domain)
        self.assertEqual(val, [None, var1, var2])

        val = self.handler.decode_setting(setting, [1, 2, 3], self.domain)
        self.assertEqual(val, [1, 2, 3])

    def test_backward_compatible_params(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            DomainContextHandler(metas_in_res=True)
            self.assertIn(OrangeDeprecationWarning,
                          [x.category for x in w])

    def test_deprecated_str_as_var(self):
        if LooseVersion(Orange.__version__) >= LooseVersion("3.26"):
            # pragma: no cover
            self.fail("Remove support for variables stored as string settings "
                      "and this test.")

        context = Mock()
        context.attributes = {"foo": 2}
        context.metas = {}
        setting = ContextSetting("")
        setting.name = "setting_name"
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            DomainContextHandler.encode_setting(context, setting, "foo")
            self.assertIn("setting_name", w[0].message.args[0])


    def create_context(self, domain, values):
        if domain is None:
            domain = Domain([])

        context = self.handler.new_context(domain,
                                           *self.handler.encode_domain(domain))
        context.values = values
        return context
class OWClusterAnalysis(widget.OWWidget):
    name = "Cluster Analysis"
    description = "Perform cluster analysis."
    icon = "icons/ClusterAnalysis.svg"
    priority = 2010

    class Inputs:
        data = Input("Data", Table, default=True)
        genes = Input("Genes", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        contingency = Output("Contingency Table", Table)

    N_GENES_PER_CLUSTER_MAX = 10
    N_MOST_ENRICHED_MAX = 50
    CELL_SIZES = (14, 22, 30)

    settingsHandler = DomainContextHandler(metas_in_res=True)
    cluster_var = ContextSetting(None)
    selection = ContextSetting(set())
    gene_selection = ContextSetting(0)
    differential_expression = ContextSetting(0)
    cell_size_ix = ContextSetting(2)
    _diff_exprs = ("high", "low", "either")
    n_genes_per_cluster = ContextSetting(3)
    n_most_enriched = ContextSetting(20)
    biclustering = ContextSetting(True)
    auto_apply = Setting(True)

    want_main_area = True

    def __init__(self):
        super().__init__()

        self.ca = None
        self.clusters = None
        self.data = None
        self.feature_model = DomainModel(valid_types=DiscreteVariable)
        self.gene_list = None
        self.model = None
        self.pvalues = None

        self._executor = ThreadExecutor()
        self._gene_selection_history = (self.gene_selection,
                                        self.gene_selection)
        self._task = None

        box = gui.vBox(self.controlArea, "Info")
        self.infobox = gui.widgetLabel(box, self._get_info_string())

        box = gui.vBox(self.controlArea, "Cluster Variable")
        gui.comboBox(box,
                     self,
                     "cluster_var",
                     sendSelectedValue=True,
                     model=self.feature_model,
                     callback=self._run_cluster_analysis)

        layout = QGridLayout()
        self.gene_selection_radio_group = gui.radioButtonsInBox(
            self.controlArea,
            self,
            "gene_selection",
            orientation=layout,
            box="Gene Selection",
            callback=self._gene_selection_changed)

        def conditional_set_gene_selection(id):
            def f():
                if self.gene_selection == id:
                    return self._set_gene_selection()

            return f

        layout.addWidget(
            gui.appendRadioButton(self.gene_selection_radio_group,
                                  "",
                                  addToLayout=False), 1, 1)
        cb = gui.hBox(None, margin=0)
        gui.widgetLabel(cb, "Top")
        self.n_genes_per_cluster_spin = gui.spin(
            cb,
            self,
            "n_genes_per_cluster",
            minv=1,
            maxv=self.N_GENES_PER_CLUSTER_MAX,
            controlWidth=60,
            alignment=Qt.AlignRight,
            callback=conditional_set_gene_selection(0))
        gui.widgetLabel(cb, "genes per cluster")
        gui.rubber(cb)
        layout.addWidget(cb, 1, 2, Qt.AlignLeft)

        layout.addWidget(
            gui.appendRadioButton(self.gene_selection_radio_group,
                                  "",
                                  addToLayout=False), 2, 1)
        mb = gui.hBox(None, margin=0)
        gui.widgetLabel(mb, "Top")
        self.n_most_enriched_spin = gui.spin(
            mb,
            self,
            "n_most_enriched",
            minv=1,
            maxv=self.N_MOST_ENRICHED_MAX,
            controlWidth=60,
            alignment=Qt.AlignRight,
            callback=conditional_set_gene_selection(1))
        gui.widgetLabel(mb, "highest enrichments")
        gui.rubber(mb)
        layout.addWidget(mb, 2, 2, Qt.AlignLeft)

        layout.addWidget(
            gui.appendRadioButton(self.gene_selection_radio_group,
                                  "",
                                  addToLayout=False,
                                  disabled=True), 3, 1)
        sb = gui.hBox(None, margin=0)
        gui.widgetLabel(sb, "User-provided list of genes")
        gui.rubber(sb)
        layout.addWidget(sb, 3, 2)

        layout = QGridLayout()
        self.differential_expression_radio_group = gui.radioButtonsInBox(
            self.controlArea,
            self,
            "differential_expression",
            orientation=layout,
            box="Differential Expression",
            callback=self._set_gene_selection)

        layout.addWidget(
            gui.appendRadioButton(self.differential_expression_radio_group,
                                  "Overexpressed in cluster",
                                  addToLayout=False), 1, 1)
        layout.addWidget(
            gui.appendRadioButton(self.differential_expression_radio_group,
                                  "Underexpressed in cluster",
                                  addToLayout=False), 2, 1)
        layout.addWidget(
            gui.appendRadioButton(self.differential_expression_radio_group,
                                  "Either",
                                  addToLayout=False), 3, 1)

        box = gui.vBox(self.controlArea, "Sorting and Zoom")
        gui.checkBox(box,
                     self,
                     "biclustering",
                     "Biclustering of analysis results",
                     callback=self._set_gene_selection)
        gui.radioButtons(box,
                         self,
                         "cell_size_ix",
                         btnLabels=("S", "M", "L"),
                         callback=lambda: self.tableview.set_cell_size(
                             self.CELL_SIZES[self.cell_size_ix]),
                         orientation=Qt.Horizontal)

        gui.rubber(self.controlArea)

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "&Apply",
                                            box=False)

        self.tableview = ContingencyTable(self)
        self.mainArea.layout().addWidget(self.tableview)

    def _get_current_gene_selection(self):
        return self._gene_selection_history[0]

    def _get_previous_gene_selection(self):
        return self._gene_selection_history[1]

    def _progress_gene_selection_history(self, new_gene_selection):
        self._gene_selection_history = (new_gene_selection,
                                        self._gene_selection_history[0])

    def _get_info_string(self):
        formatstr = "Cells: {0}\nGenes: {1}\nClusters: {2}"
        if self.data:
            return formatstr.format(len(self.data),
                                    len(self.data.domain.attributes),
                                    len(self.cluster_var.values))
        else:
            return formatstr.format(*["No input data"] * 3)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.feature_model.set_domain(None)
        self.ca = None
        self.cluster_var = None
        self.columns = None
        self.clusters = None
        self.gene_list = None
        self.model = None
        self.pvalues = None
        self.n_genes_per_cluster_spin.setMaximum(self.N_GENES_PER_CLUSTER_MAX)
        self.n_most_enriched_spin.setMaximum(self.N_MOST_ENRICHED_MAX)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.feature_model:
                self.openContext(self.data)
                if self.cluster_var is None:
                    self.cluster_var = self.feature_model[0]
                self._run_cluster_analysis()
            else:
                self.tableview.clear()
        else:
            self.tableview.clear()

    @Inputs.genes
    def set_genes(self, data):
        self.Error.clear()
        gene_list_radio = self.gene_selection_radio_group.group.buttons()[2]

        if (data is None or GENE_AS_ATTRIBUTE_NAME not in data.attributes
                or not data.attributes[GENE_AS_ATTRIBUTE_NAME]
                and GENE_ID_COLUMN not in data.attributes
                or data.attributes[GENE_AS_ATTRIBUTE_NAME]
                and GENE_ID_ATTRIBUTE not in data.attributes):
            if data is not None:
                self.error(
                    "Gene annotations missing in the input data. Use Gene Name Matching widget."
                )
            self.gene_list = None
            gene_list_radio.setDisabled(True)
            if self.gene_selection == 2:
                self.gene_selection_radio_group.group.buttons()[
                    self._get_previous_gene_selection()].click()
        else:
            if data.attributes[GENE_AS_ATTRIBUTE_NAME]:
                gene_id_attribute = data.attributes.get(
                    GENE_ID_ATTRIBUTE, None)

                self.gene_list = tuple(
                    str(var.attributes[gene_id_attribute])
                    for var in data.domain.attributes
                    if gene_id_attribute in var.attributes
                    and var.attributes[gene_id_attribute] != "?")
            else:
                gene_id_column = data.attributes.get(GENE_ID_COLUMN, None)
                self.gene_list = tuple(
                    str(v) for v in data.get_column_view(gene_id_column)[0]
                    if v not in ("", "?"))
            gene_list_radio.setDisabled(False)
            if self.gene_selection == 2:
                self._set_gene_selection()
            else:
                gene_list_radio.click()

    def _run_cluster_analysis(self):
        self.infobox.setText(self._get_info_string())
        gene_count = len(self.data.domain.attributes)
        cluster_count = len(self.cluster_var.values)
        self.n_genes_per_cluster_spin.setMaximum(
            min(self.N_GENES_PER_CLUSTER_MAX, gene_count // cluster_count))
        self.n_most_enriched_spin.setMaximum(
            min(self.N_MOST_ENRICHED_MAX, gene_count))
        # TODO: what happens if error occurs? If CA fails, widget should properly handle it.
        self._start_task_init(
            partial(ClusterAnalysis, self.data, self.cluster_var.name))

    def _start_task_init(self, f):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self._task = Task("init")

        def callback(finished):
            if self._task.cancelled:
                raise KeyboardInterrupt()
            self.progressBarSet(finished * 50)

        f = partial(f, callback=callback)

        self.progressBarInit()
        self._task.future = self._executor.submit(f)
        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._init_task_finished)

    def _start_task_gene_selection(self, f):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self._task = Task("gene_selection")

        def callback(finished):
            if self._task.cancelled:
                raise KeyboardInterrupt()
            self.progressBarSet(50 + finished * 50)

        f = partial(f, callback=callback)

        self.progressBarInit()
        self.progressBarSet(50)
        self._task.future = self._executor.submit(f)
        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._gene_selection_task_finished)

    @Slot(concurrent.futures.Future)
    def _init_task_finished(self, f):
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        self.ca = f.result()
        self._set_gene_selection()

    @Slot(concurrent.futures.Future)
    def _gene_selection_task_finished(self, f):
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        self.clusters, genes, self.model, self.pvalues = f.result()
        genes = [str(gene) for gene in genes]
        self.columns = DiscreteVariable("Gene", genes, ordered=True)
        self.tableview.set_headers(
            self.clusters,
            self.columns.values,
            circles=True,
            cell_size=self.CELL_SIZES[self.cell_size_ix],
            bold_headers=False)

        def tooltip(i, j):
            return (
                "<b>cluster</b>: {}<br /><b>gene</b>: {}<br /><b>fraction expressing</b>: {:.2f}<br />\
                                <b>p-value</b>: {:.2e}".format(
                    self.clusters[i], self.columns.values[j], self.model[i, j],
                    self.pvalues[i, j]))

        self.tableview.update_table(self.model, tooltip=tooltip)
        self._invalidate()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            if self._task.type == "init":
                self._task.watcher.done.disconnect(self._init_task_finished)
            else:
                self._task.watcher.done.disconnect(
                    self._gene_selection_task_finished)
            self._task = None

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    def _gene_selection_changed(self):
        if self.gene_selection != self._get_current_gene_selection():
            self._progress_gene_selection_history(self.gene_selection)
            self.differential_expression_radio_group.setDisabled(
                self.gene_selection == 2)
            self._set_gene_selection()

    def _set_gene_selection(self):
        self.Warning.clear()
        if self.ca is not None and (self._task is None
                                    or self._task.type != "init"):
            if self.gene_selection == 0:
                f = partial(self.ca.enriched_genes_per_cluster,
                            self.n_genes_per_cluster)
            elif self.gene_selection == 1:
                f = partial(self.ca.enriched_genes_data, self.n_most_enriched)
            else:
                if self.data is not None and GENE_ID_ATTRIBUTE not in self.data.attributes:
                    self.error(
                        "Gene annotations missing in the input data. Use Gene Name Matching widget."
                    )
                    if self.gene_selection == 2:
                        self.gene_selection_radio_group.group.buttons()[
                            self._get_previous_gene_selection()].click()
                    return
                relevant_genes = tuple(self.ca.intersection(self.gene_list))
                if len(relevant_genes) > self.N_MOST_ENRICHED_MAX:
                    self.warning("Only first {} reference genes shown.".format(
                        self.N_MOST_ENRICHED_MAX))
                f = partial(self.ca.enriched_genes,
                            relevant_genes[:self.N_MOST_ENRICHED_MAX])
            f = partial(
                f,
                enrichment=self._diff_exprs[self.differential_expression],
                biclustering=self.biclustering)
            self._start_task_gene_selection(f)
        else:
            self._invalidate()

    def handleNewSignals(self):
        self._invalidate()

    def commit(self):
        if len(self.selection):
            cluster_ids = set()
            column_ids = set()
            for (ir, ic) in self.selection:
                cluster_ids.add(ir)
                column_ids.add(ic)
            new_domain = Domain([
                self.data.domain[self.columns.values[col]]
                for col in column_ids
            ], self.data.domain.class_vars, self.data.domain.metas)
            selected_data = Values([
                FilterDiscrete(self.cluster_var, [self.clusters[ir]])
                for ir in cluster_ids
            ],
                                   conjunction=False)(self.data)
            selected_data = selected_data.transform(new_domain)
            annotated_data = create_annotated_table(
                self.data.transform(new_domain),
                np.where(np.in1d(self.data.ids, selected_data.ids, True)))
        else:
            selected_data = None
            annotated_data = create_annotated_table(self.data, [])
        if self.ca is not None and self._task is None:
            table = self.ca.create_contingency_table()
        else:
            table = None
        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)
        self.Outputs.contingency.send(table)

    def _invalidate(self):
        self.selection = self.tableview.get_selection()
        self.commit()

    def send_report(self):
        rows = None
        columns = None
        if self.data is not None:
            rows = self.cluster_var
            if rows in self.data.domain:
                rows = self.data.domain[rows]
            columns = self.columns
            if columns in self.data.domain:
                columns = self.data.domain[columns]
        self.report_items((
            ("Rows", rows),
            ("Columns", columns),
        ))
Example #25
0
class OWDataTable(widget.OWWidget):
    name = "Data Table"
    description = "View the data set in a spreadsheet."
    icon = "icons/Table.svg"
    priority = 10

    buttons_area_orientation = Qt.Vertical

    inputs = [("Data", Table, "set_dataset", widget.Multiple)]
    outputs = [("Selected Data", Table, widget.Default),
               (ANNOTATED_DATA_SIGNAL_NAME, Table)]

    show_distributions = Setting(False)
    dist_color_RGB = Setting((220, 220, 220, 255))
    show_attribute_labels = Setting(True)
    select_rows = Setting(True)
    auto_commit = Setting(True)

    color_by_class = Setting(True)
    settingsHandler = DomainContextHandler(
        match_values=DomainContextHandler.MATCH_VALUES_ALL)
    selected_rows = ContextSetting([])
    selected_cols = ContextSetting([])

    def __init__(self):
        super().__init__()

        self._inputs = OrderedDict()

        self.dist_color = QColor(*self.dist_color_RGB)

        info_box = gui.vBox(self.controlArea, "Info")
        self.info_ex = gui.widgetLabel(info_box, 'No data on input.', )
        self.info_ex.setWordWrap(True)
        self.info_attr = gui.widgetLabel(info_box, ' ')
        self.info_attr.setWordWrap(True)
        self.info_class = gui.widgetLabel(info_box, ' ')
        self.info_class.setWordWrap(True)
        self.info_meta = gui.widgetLabel(info_box, ' ')
        self.info_meta.setWordWrap(True)
        info_box.setMinimumWidth(200)
        gui.separator(self.controlArea)

        box = gui.vBox(self.controlArea, "Variables")
        self.c_show_attribute_labels = gui.checkBox(
            box, self, "show_attribute_labels",
            "Show variable labels (if present)",
            callback=self._on_show_variable_labels_changed)

        gui.checkBox(box, self, "show_distributions",
                     'Visualize continuous values',
                     callback=self._on_distribution_color_changed)
        gui.checkBox(box, self, "color_by_class", 'Color by instance classes',
                     callback=self._on_distribution_color_changed)

        box = gui.vBox(self.controlArea, "Selection")

        gui.checkBox(box, self, "select_rows", "Select full rows",
                     callback=self._on_select_rows_changed)

        gui.rubber(self.controlArea)

        reset = gui.button(
            None, self, "Restore Original Order", callback=self.restore_order,
            tooltip="Show rows in the original order", autoDefault=False)
        self.buttonsArea.layout().insertWidget(0, reset)
        gui.auto_commit(self.buttonsArea, self, "auto_commit",
                        "Send Selected Rows", "Send Automatically")

        # GUI with tabs
        self.tabs = gui.tabWidget(self.mainArea)
        self.tabs.currentChanged.connect(self._on_current_tab_changed)

        copy = QAction("Copy", self, shortcut=QKeySequence.Copy,
                             triggered=self.copy)
        self.addAction(copy)

    def sizeHint(self):
        return QSize(800, 500)

    def set_dataset(self, data, tid=None):
        """Set the input dataset."""
        self.closeContext()
        if data is not None:
            if tid in self._inputs:
                # update existing input slot
                slot = self._inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setSectionsMovable(True)
                header.setSectionsClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)

            view.dataset = data
            self.tabs.addTab(view, getattr(data, "name", "Data"))

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot
            self._inputs[tid] = slot

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):
                def update(f):
                    QMetaObject.invokeMethod(
                        self, "_update_info", Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self._inputs:
            slot = self._inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)
        self.selected_rows = []
        self.selected_cols = []
        self.openContext(data)
        self.set_selection()
        self.commit()

    def _setup_table_view(self, view, data):
        """Setup the `view` (QTableView) with `data` (Orange.data.Table)
        """
        if data is None:
            view.setModel(None)
            return

        datamodel = TableModel(data)
        datamodel = RichTableDecorator(datamodel)

        rowcount = data.approx_len()

        if self.color_by_class and data.domain.has_discrete_class:
            color_schema = [
                QColor(*c) for c in data.domain.class_var.colors]
        else:
            color_schema = None
        if self.show_distributions:
            view.setItemDelegate(
                gui.TableBarItem(
                    self, color=self.dist_color, color_schema=color_schema)
            )
        else:
            view.setItemDelegate(QStyledItemDelegate(self))

        # Enable/disable view sorting based on data's type
        view.setSortingEnabled(is_sortable(data))
        header = view.horizontalHeader()
        header.setSectionsClickable(is_sortable(data))
        header.setSortIndicatorShown(is_sortable(data))

        view.setModel(datamodel)

        vheader = view.verticalHeader()
        option = view.viewOptions()
        size = view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option,
            QSize(20, 20), view)

        vheader.setDefaultSectionSize(size.height() + 2)
        vheader.setMinimumSectionSize(5)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        # Limit the number of rows displayed in the QTableView
        # (workaround for QTBUG-18490 / QTBUG-28631)
        maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2)
        if rowcount > maxrows:
            sliceproxy = TableSliceProxy(
                parent=view, rowSlice=slice(0, maxrows))
            sliceproxy.setSourceModel(datamodel)
            # First reset the view (without this the header view retains
            # it's state - at this point invalid/broken)
            view.setModel(None)
            view.setModel(sliceproxy)

        assert view.model().rowCount() <= maxrows
        assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0

        # update the header (attribute names)
        self._update_variable_labels(view)

        selmodel = BlockSelectionModel(
            view.model(), parent=view, selectBlocks=not self.select_rows)
        view.setSelectionModel(selmodel)
        view.selectionModel().selectionChanged.connect(self.update_selection)

    #noinspection PyBroadException
    def set_corner_text(self, table, text):
        """Set table corner text."""
        # As this is an ugly hack, do everything in
        # try - except blocks, as it may stop working in newer Qt.

        if not hasattr(table, "btn") and not hasattr(table, "btnfailed"):
            try:
                btn = table.findChild(QAbstractButton)

                class efc(QObject):
                    def eventFilter(self, o, e):
                        if (isinstance(o, QAbstractButton) and
                                e.type() == QEvent.Paint):
                            # paint by hand (borrowed from QTableCornerButton)
                            btn = o
                            opt = QStyleOptionHeader()
                            opt.initFrom(btn)
                            state = QStyle.State_None
                            if btn.isEnabled():
                                state |= QStyle.State_Enabled
                            if btn.isActiveWindow():
                                state |= QStyle.State_Active
                            if btn.isDown():
                                state |= QStyle.State_Sunken
                            opt.state = state
                            opt.rect = btn.rect()
                            opt.text = btn.text()
                            opt.position = QStyleOptionHeader.OnlyOneSection
                            painter = QStylePainter(btn)
                            painter.drawControl(QStyle.CE_Header, opt)
                            return True     # eat event
                        return False
                table.efc = efc()
                btn.installEventFilter(table.efc)
                table.btn = btn

                if sys.platform == "darwin":
                    btn.setAttribute(Qt.WA_MacSmallSize)

            except Exception:
                table.btnfailed = True

        if hasattr(table, "btn"):
            try:
                btn = table.btn
                btn.setText(text)
                opt = QStyleOptionHeader()
                opt.text = btn.text()
                s = btn.style().sizeFromContents(
                    QStyle.CT_HeaderSection,
                    opt, QSize(),
                    btn).expandedTo(QApplication.globalStrut())
                if s.isValid():
                    table.verticalHeader().setMinimumWidth(s.width())
            except Exception:
                pass

    def _on_current_tab_changed(self, index):
        """Update the info box on current tab change"""
        view = self.tabs.widget(index)
        if view is not None and view.model() is not None:
            self.set_info(view._input_slot.summary)
        else:
            self.set_info(None)

    def _update_variable_labels(self, view):
        "Update the variable labels visibility for `view`"
        model = view.model()
        if isinstance(model, TableSliceProxy):
            model = model.sourceModel()

        if self.show_attribute_labels:
            model.setRichHeaderFlags(
                RichTableDecorator.Labels | RichTableDecorator.Name)

            labelnames = set()
            for a in model.source.domain:
                labelnames.update(a.attributes.keys())
            labelnames = sorted(
                [label for label in labelnames if not label.startswith("_")])
            self.set_corner_text(view, "\n".join([""] + labelnames))
        else:
            model.setRichHeaderFlags(RichTableDecorator.Name)
            self.set_corner_text(view, "")

    def _on_show_variable_labels_changed(self):
        """The variable labels (var.attribues) visibility was changed."""
        for slot in self._inputs.values():
            self._update_variable_labels(slot.view)

    def _on_distribution_color_changed(self):
        for ti in range(self.tabs.count()):
            widget = self.tabs.widget(ti)
            model = widget.model()
            while isinstance(model, QAbstractProxyModel):
                model = model.sourceModel()
            data = model.source
            class_var = data.domain.class_var
            if self.color_by_class and class_var and class_var.is_discrete:
                color_schema = [QColor(*c) for c in class_var.colors]
            else:
                color_schema = None
            if self.show_distributions:
                delegate = gui.TableBarItem(self, color=self.dist_color,
                                            color_schema=color_schema)
            else:
                delegate = QStyledItemDelegate(self)
            widget.setItemDelegate(delegate)
        tab = self.tabs.currentWidget()
        if tab:
            tab.reset()

    def _on_select_rows_changed(self):
        for slot in self._inputs.values():
            selection_model = slot.view.selectionModel()
            selection_model.setSelectBlocks(not self.select_rows)
            if self.select_rows:
                slot.view.setSelectionBehavior(QTableView.SelectRows)
                # Expand the current selection to full row selection.
                selection_model.select(
                    selection_model.selection(),
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
            else:
                slot.view.setSelectionBehavior(QTableView.SelectItems)

    def restore_order(self):
        """Restore the original data order of the current view."""
        table = self.tabs.currentWidget()
        if table is not None:
            table.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)

    def set_info(self, summary):
        if summary is None:
            self.info_ex.setText("No data on input.")
            self.info_attr.setText("")
            self.info_class.setText("")
            self.info_meta.setText("")
        else:
            info_len, info_attr, info_class, info_meta = \
                format_summary(summary)

            self.info_ex.setText(info_len)
            self.info_attr.setText(info_attr)
            self.info_class.setText(info_class)
            self.info_meta.setText(info_meta)

    @Slot()
    def _update_info(self):
        current = self.tabs.currentWidget()
        if current is not None and current.model() is not None:
            self.set_info(current._input_slot.summary)

    def update_selection(self, *_):
        self.commit()

    def set_selection(self):
        if len(self.selected_rows) and len(self.selected_cols):
            view = self.tabs.currentWidget()
            model = view.model()
            if model.rowCount() <= self.selected_rows[-1] or \
                    model.columnCount() <= self.selected_cols[-1]:
                return

            selection = QItemSelection()
            rowranges = list(ranges(self.selected_rows))
            colranges = list(ranges(self.selected_cols))

            for rowstart, rowend in rowranges:
                for colstart, colend in colranges:
                    selection.append(
                        QItemSelectionRange(
                            view.model().index(rowstart, colstart),
                            view.model().index(rowend - 1, colend - 1)
                        )
                    )
            view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def get_selection(self, view):
        """
        Return the selected row and column indices of the selection in view.
        """
        selection = view.selectionModel().selection()
        model = view.model()
        # map through the proxies into input table.
        while isinstance(model, QAbstractProxyModel):
            selection = model.mapSelectionToSource(selection)
            model = model.sourceModel()

        assert isinstance(model, TableModel)

        indexes = selection.indexes()

        rows = list(set(ind.row() for ind in indexes))
        # map the rows through the applied sorting (if any)
        rows = sorted(model.mapToTableRows(rows))
        cols = sorted(set(ind.column() for ind in indexes))
        return rows, cols

    @staticmethod
    def _get_model(view):
        model = view.model()
        while isinstance(model, QAbstractProxyModel):
            model = model.sourceModel()
        return model

    def commit(self):
        """
        Commit/send the current selected row/column selection.
        """
        selected_data = table = rowsel = None
        view = self.tabs.currentWidget()
        if view and view.model() is not None:
            model = self._get_model(view)
            table = model.source  # The input data table

            # Selections of individual instances are not implemented
            # for SqlTables
            if isinstance(table, SqlTable):
                self.send("Selected Data", selected_data)
                self.send(ANNOTATED_DATA_SIGNAL_NAME, None)
                return

            rowsel, colsel = self.get_selection(view)
            self.selected_rows, self.selected_cols = rowsel, colsel

            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data

            domain = table.domain

            if len(colsel) < len(domain) + len(domain.metas):
                # only a subset of the columns is selected
                allvars = domain.class_vars + domain.metas + domain.attributes
                columns = [(c, model.headerData(c, Qt.Horizontal,
                                                TableModel.DomainRole))
                           for c in colsel]
                assert all(role is not None for _, role in columns)

                def select_vars(role):
                    """select variables for role (TableModel.DomainRole)"""
                    return [allvars[c] for c, r in columns if r == role]

                attrs = select_vars(TableModel.Attribute)
                if attrs and issparse(table.X):
                    # for sparse data you can only select all attributes
                    attrs = table.domain.attributes
                class_vars = select_vars(TableModel.ClassVar)
                metas = select_vars(TableModel.Meta)
                domain = Orange.data.Domain(attrs, class_vars, metas)

            # Avoid a copy if all/none rows are selected.
            if not rowsel:
                selected_data = None
            elif len(rowsel) == len(table):
                selected_data = select(table, None, domain)
            else:
                selected_data = select(table, rowsel, domain)

        self.send("Selected Data", selected_data)
        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(table, rowsel))

    def copy(self):
        """
        Copy current table selection to the clipboard.
        """
        view = self.tabs.currentWidget()
        if view is not None:
            mime = table_selection_to_mime_data(view)
            QApplication.clipboard().setMimeData(
                mime, QClipboard.Clipboard
            )

    def send_report(self):
        view = self.tabs.currentWidget()
        if not view or not view.model():
            return
        model = self._get_model(view)
        self.report_data_brief(model.source)
        self.report_table(view)
Example #26
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102

    buttons_area_orientation = Qt.Vertical

    inputs = [("Data", Table, "setData"),
              ("Scorer", score.Scorer, "set_learner", widget.Multiple)]
    outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)]

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"})
    reg_default_selected = Setting(
        {"Univariate Linear Regression", "RReliefF"})
    selectMethod = Setting(SelectNBest)
    nSelected = Setting(5)
    auto_apply = Setting(True)

    # Header state for discrete/continuous/no_class scores
    headerState = Setting([None, None, None])

    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])

    gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True
    _score_vars = [
        "gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr",
        "rrelief"
    ]

    class Warning(OWWidget.Warning):
        no_target_var = Msg("Data does not have a target variable")

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("{}")

    def __init__(self):
        super().__init__()
        self.measure_scores = None
        self.update_scores = True
        self.usefulAttributes = []
        self.learners = {}
        self.labels = []
        self.out_domain_desc = None

        self.all_measures = SCORES

        self.selectedMeasures = dict([(m.name, True)
                                      for m in self.all_measures])
        # Discrete (0) or continuous (1) class mode
        self.rankMode = 0

        self.data = None

        self.discMeasures = [
            m for m in self.all_measures
            if issubclass(DiscreteVariable, m.score.class_type)
        ]
        self.contMeasures = [
            m for m in self.all_measures
            if issubclass(ContinuousVariable, m.score.class_type)
        ]

        self.score_checks = []
        self.cls_scoring_box = gui.vBox(None, "Scoring for Classification")
        self.reg_scoring_box = gui.vBox(None, "Scoring for Regression")
        boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2
        for _score, var, box in zip(SCORES, self._score_vars, boxes):
            check = gui.checkBox(
                box,
                self,
                var,
                label=_score.name,
                callback=lambda val=_score: self.measuresSelectionChanged(val))
            self.score_checks.append(check)

        self.score_stack = QtGui.QStackedWidget(self)
        self.score_stack.addWidget(self.cls_scoring_box)
        self.score_stack.addWidget(self.reg_scoring_box)
        self.score_stack.addWidget(QtGui.QWidget())
        self.controlArea.layout().addWidget(self.score_stack)

        gui.rubber(self.controlArea)

        selMethBox = gui.vBox(self.controlArea,
                              "Select Attributes",
                              addSpace=True)

        grid = QtGui.QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QtGui.QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectMethod)

        def button(text, buttonid, toolTip=None):
            b = QtGui.QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox,
                     self,
                     "nSelected",
                     1,
                     100,
                     callback=self.nSelectedChanged)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        # Discrete, continuous and no_class table views are stacked
        self.ranksViewStack = QtGui.QStackedLayout()
        self.mainArea.layout().addLayout(self.ranksViewStack)

        self.discRanksView = QtGui.QTableView()
        self.ranksViewStack.addWidget(self.discRanksView)
        self.discRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows)
        self.discRanksView.setSelectionMode(QtGui.QTableView.MultiSelection)
        self.discRanksView.setSortingEnabled(True)

        self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures]
        self.discRanksModel = QtGui.QStandardItemModel(self)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)

        self.discRanksProxyModel = MySortProxyModel(self)
        self.discRanksProxyModel.setSourceModel(self.discRanksModel)
        self.discRanksView.setModel(self.discRanksProxyModel)

        self.discRanksView.setColumnWidth(0, 20)
        self.discRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.discRanksView.pressed.connect(self.onSelectItem)
        self.discRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.discRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[0] is not None:
            self.discRanksView.horizontalHeader().restoreState(
                self.headerState[0])

        self.contRanksView = QtGui.QTableView()
        self.ranksViewStack.addWidget(self.contRanksView)
        self.contRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows)
        self.contRanksView.setSelectionMode(QtGui.QTableView.MultiSelection)
        self.contRanksView.setSortingEnabled(True)

        self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures]
        self.contRanksModel = QtGui.QStandardItemModel(self)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)

        self.contRanksProxyModel = MySortProxyModel(self)
        self.contRanksProxyModel.setSourceModel(self.contRanksModel)
        self.contRanksView.setModel(self.contRanksProxyModel)

        self.contRanksView.setColumnWidth(0, 20)
        self.contRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.contRanksView.pressed.connect(self.onSelectItem)
        self.contRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.contRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[1] is not None:
            self.contRanksView.horizontalHeader().restoreState(
                self.headerState[1])

        self.noClassRanksView = QtGui.QTableView()
        self.ranksViewStack.addWidget(self.noClassRanksView)
        self.noClassRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows)
        self.noClassRanksView.setSelectionMode(QtGui.QTableView.MultiSelection)
        self.noClassRanksView.setSortingEnabled(True)

        self.noClassRanksLabels = ["#"]
        self.noClassRanksModel = QtGui.QStandardItemModel(self)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)

        self.noClassRanksProxyModel = MySortProxyModel(self)
        self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel)
        self.noClassRanksView.setModel(self.noClassRanksProxyModel)

        self.noClassRanksView.setColumnWidth(0, 20)
        self.noClassRanksView.selectionModel().selectionChanged.connect(
            self.commit)
        self.noClassRanksView.pressed.connect(self.onSelectItem)
        self.noClassRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick)
        self.noClassRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem)

        if self.headerState[2] is not None:
            self.noClassRanksView.horizontalHeader().restoreState(
                self.headerState[2])

        # Switch the current view to Discrete
        self.switchRanksMode(0)
        self.resetInternals()
        self.updateDelegates()
        self.updateVisibleScoreColumns()

        self.resize(690, 500)

        self.measure_scores = table((len(self.measures), 0), None)

    def switchRanksMode(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.rankMode = index
        self.ranksViewStack.setCurrentIndex(index)

        if index == 0:
            self.ranksView = self.discRanksView
            self.ranksModel = self.discRanksModel
            self.ranksProxyModel = self.discRanksProxyModel
            self.measures = self.discMeasures
            self.selected_checks = self.cls_default_selected
            self.reg_scoring_box.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                               QtGui.QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QtGui.QSizePolicy.Expanding,
                                               QtGui.QSizePolicy.Expanding)
        elif index == 1:
            self.ranksView = self.contRanksView
            self.ranksModel = self.contRanksModel
            self.ranksProxyModel = self.contRanksProxyModel
            self.measures = self.contMeasures
            self.selected_checks = self.reg_default_selected
            self.cls_scoring_box.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                               QtGui.QSizePolicy.Ignored)
            self.reg_scoring_box.setSizePolicy(QtGui.QSizePolicy.Expanding,
                                               QtGui.QSizePolicy.Expanding)
        else:
            self.ranksView = self.noClassRanksView
            self.ranksModel = self.noClassRanksModel
            self.ranksProxyModel = self.noClassRanksProxyModel
            self.measures = []
            self.selected_checks = set()
            self.reg_scoring_box.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                               QtGui.QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                               QtGui.QSizePolicy.Ignored)

        shape = (len(self.measures) + len(self.learners), 0)
        self.measure_scores = table(shape, None)
        self.update_scores = False
        for check, score in zip(self.score_checks, SCORES):
            check.setChecked(score.name in self.selected_checks)
        self.update_scores = True
        self.score_stack.setCurrentIndex(index)
        self.updateVisibleScoreColumns()

    @check_sql_input
    def setData(self, data):
        self.closeContext()
        self.clear_messages()
        self.resetInternals()

        self.data = data
        self.switchRanksMode(0)
        if self.data is not None:
            domain = self.data.domain
            attrs = domain.attributes
            self.usefulAttributes = [
                attr for attr in attrs
                if attr.is_discrete or attr.is_continuous
            ]

            if domain.has_continuous_class:
                self.switchRanksMode(1)
            elif not domain.class_var:
                self.Warning.no_target_var()
                self.switchRanksMode(2)
            elif not domain.has_discrete_class:
                self.Error.invalid_type(type(domain.class_var).__name__)

            if issparse(
                    self.data.X):  # keep only measures supporting sparse data
                self.measures = [
                    m for m in self.measures if m.score.supports_sparse_data
                ]

            self.ranksModel.setRowCount(len(attrs))
            for i, a in enumerate(attrs):
                if a.is_discrete:
                    v = len(a.values)
                else:
                    v = "C"
                item = ScoreValueItem()
                item.setData(v, Qt.DisplayRole)
                self.ranksModel.setItem(i, 0, item)
                item = QtGui.QStandardItem(a.name)
                item.setData(gui.attributeIconDict[a], Qt.DecorationRole)
                self.ranksModel.setVerticalHeaderItem(i, item)

            shape = (len(self.measures) + len(self.learners), len(attrs))
            self.measure_scores = table(shape, None)
            self.updateScores()
        else:
            self.send("Scores", None)

        self.selected_rows = []
        self.openContext(data)
        self.selectMethodChanged()
        self.commit()

    def get_selection(self):
        selection = self.ranksView.selectionModel().selection()
        return list(set(ind.row() for ind in selection.indexes()))

    def set_learner(self, learner, lid=None):
        if learner is None and lid is not None:
            del self.learners[lid]
        elif learner is not None:
            self.learners[lid] = score_meta(learner.name, learner.name,
                                            learner)
        attrs_len = 0 if not self.data else len(self.data.domain.attributes)
        shape = (len(self.learners), attrs_len)
        self.measure_scores = self.measure_scores[:len(self.measures)]
        self.measure_scores += table(shape, None)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)
        measures_mask = [False] * len(self.measures)
        measures_mask += [True for _ in self.learners]
        self.updateScores(measures_mask)
        self.commit()

    def updateScores(self, measuresMask=None):
        """
        Update the current computed scores.

        If `measuresMask` is given it must be an list of bool values
        indicating what measures should be recomputed.

        """
        if not self.data:
            return
        if self.data.has_missing():
            self.information("Missing values have been imputed.")

        measures = self.measures + [v for k, v in self.learners.items()]
        if measuresMask is None:
            # Update all selected measures
            measuresMask = [
                self.selectedMeasures.get(m.name) for m in self.measures
            ]
            measuresMask = measuresMask + [
                v.name for k, v in self.learners.items()
            ]

        data = self.data
        learner_col = len(self.measures)
        if len(measuresMask) <= len(self.measures) or \
                measuresMask[len(self.measures)]:
            self.labels = []
            self.Error.inadequate_learner.clear()

        self.setStatusMessage("Running")
        with self.progressBar():
            n_measure_update = len([x for x in measuresMask if x is not False])
            count = 0
            for index, (meas, mask) in enumerate(zip(measures, measuresMask)):
                if not mask:
                    continue
                self.progressBarSet(90 * count / n_measure_update)
                count += 1
                if index < len(self.measures):
                    estimator = meas.score()
                    try:
                        self.measure_scores[index] = estimator(data)
                    except ValueError:
                        self.measure_scores[index] = []
                        for attr in data.domain.attributes:
                            try:
                                self.measure_scores[index].append(
                                    estimator(data, attr))
                            except ValueError:
                                self.measure_scores[index].append(None)
                else:
                    learner = meas.score
                    if isinstance(learner, Learner) and \
                            not learner.check_learner_adequacy(self.data.domain):
                        self.Error.inadequate_learner(
                            learner.learner_adequacy_err_msg)
                        scores = table((1, len(data.domain.attributes)))
                    else:
                        scores = meas.score.score_data(data)
                    for i, row in enumerate(scores):
                        self.labels.append(meas.shortname + str(i + 1))
                        if len(self.measure_scores) > learner_col:
                            self.measure_scores[learner_col] = row
                        else:
                            self.measure_scores.append(row)
                        learner_col += 1
            self.progressBarSet(90)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels +
                                                      self.labels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels +
                                                      self.labels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels + self.labels)
        self.updateRankModel(measuresMask)
        self.ranksProxyModel.invalidate()
        self.selectMethodChanged()
        self.send("Scores", self.create_scores_table(self.labels))
        self.setStatusMessage("")

    def updateRankModel(self, measuresMask):
        """
        Update the rankModel.
        """
        values = []
        diff = len(self.measure_scores) - len(measuresMask)
        if len(measuresMask):
            measuresMask += [measuresMask[-1]] * diff
        for i in range(self.ranksModel.columnCount() - 1,
                       len(self.measure_scores), -1):
            self.ranksModel.removeColumn(i)

        for i, (scores, m) in enumerate(zip(self.measure_scores,
                                            measuresMask)):
            if not m and self.ranksModel.item(0, i + 1):
                values.append([])
                continue
            values_one = []
            for j, _score in enumerate(scores):
                values_one.append(_score)
                item = self.ranksModel.item(j, i + 1)
                if not item:
                    item = ScoreValueItem()
                    self.ranksModel.setItem(j, i + 1, item)
                item.setData(_score, Qt.DisplayRole)
            values.append(values_one)
        for i, (vals, m) in enumerate(zip(values, measuresMask)):
            if not m:
                continue
            valid_vals = [v for v in vals if v is not None]
            if valid_vals:
                vmin, vmax = min(valid_vals), max(valid_vals)
                for j, v in enumerate(vals):
                    if v is not None:
                        # Set the bar ratio role for i-th measure.
                        ratio = float((v - vmin) / ((vmax - vmin) or 1))
                        item = self.ranksModel.item(j, i + 1)
                        item.setData(ratio, gui.BarRatioRole)

        self.ranksView.setColumnWidth(0, 20)
        self.ranksView.resizeRowsToContents()

    def resetInternals(self):
        self.data = None
        self.usefulAttributes = []
        self.ranksModel.setRowCount(0)

    def onSelectItem(self, index):
        """
        Called when the user selects/unselects an item in the table view.
        """
        self.selectMethod = OWRank.SelectManual  # Manual
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.commit()

    def setSelectMethod(self, method):
        if self.selectMethod != method:
            self.selectMethod = method
            self.selectButtons.button(method).setChecked(True)
            self.selectMethodChanged()

    def selectMethodChanged(self):
        self.autoSelection()
        self.ranksView.setFocus()

    def nSelectedChanged(self):
        self.selectMethod = OWRank.SelectNBest
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.selectMethodChanged()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        rowCount = self.ranksModel.rowCount()
        columnCount = self.ranksModel.columnCount()
        model = self.ranksProxyModel

        if self.selectMethod == OWRank.SelectNone:
            selection = QtGui.QItemSelection()
        elif self.selectMethod == OWRank.SelectAll:
            selection = QtGui.QItemSelection(
                model.index(0, 0), model.index(rowCount - 1, columnCount - 1))
        elif self.selectMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QtGui.QItemSelection(
                model.index(0, 0), model.index(nSelected - 1, columnCount - 1))
        else:
            selection = QtGui.QItemSelection()
            if len(self.selected_rows):
                selection = QtGui.QItemSelection()
                for row in self.selected_rows:
                    selection.append(
                        QtGui.QItemSelectionRange(
                            model.index(row, 0),
                            model.index(row, columnCount - 1)))

        selModel.select(selection, QtGui.QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        disc = bytes(self.discRanksView.horizontalHeader().saveState())
        cont = bytes(self.contRanksView.horizontalHeader().saveState())
        no_class = bytes(self.noClassRanksView.horizontalHeader().saveState())
        self.headerState = [disc, cont, no_class]

    def measuresSelectionChanged(self, measure):
        """Measure selection has changed. Update column visibility.
        """
        checked = self.selectedMeasures[measure.name]
        self.selectedMeasures[measure.name] = not checked
        if not checked:
            self.selected_checks.add(measure.name)
        elif measure.name in self.selected_checks:
            self.selected_checks.remove(measure.name)
        measures_mask = [False] * len(self.measures)
        measures_mask += [False for _ in self.learners]
        # Update scores for shown column if they are not yet computed.
        if measure in self.measures and self.measure_scores:
            index = self.measures.index(measure)
            if all(s is None for s in self.measure_scores[index]):
                measures_mask[index] = True
        if self.update_scores:
            self.updateScores(measures_mask)
        self.updateVisibleScoreColumns()

    def updateVisibleScoreColumns(self):
        """
        Update the visible columns of the scores view.
        """
        for i, measure in enumerate(self.measures):
            shown = self.selectedMeasures.get(measure.name)
            self.ranksView.setColumnHidden(i + 1, not shown)
            self.ranksView.setColumnWidth(i + 1, 100)

        index = self.ranksView.horizontalHeader().sortIndicatorSection()
        if self.ranksView.isColumnHidden(index):
            self.headerState[self.rankMode] = None

        if self.headerState[self.rankMode] is None:

            def get_sort_by_col(measures, selected_measures):
                cols = [
                    i + 1 for i, m in enumerate(measures)
                    if m.name in selected_measures
                ]
                return cols[0] if cols else len(measures) + 1

            col = get_sort_by_col(self.measures, self.selected_checks)
            self.ranksView.sortByColumn(col, Qt.DescendingOrder)
            self.autoSelection()

    def updateDelegates(self):
        self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        self.selected_rows = self.get_selection()
        if self.data and len(self.data.domain.attributes) == len(
                self.selected_rows):
            self.selectMethod = OWRank.SelectAll
            self.selectButtons.button(self.selectMethod).setChecked(True)
        selected = self.selectedAttrs()
        if not self.data or not selected:
            self.send("Reduced Data", None)
            self.out_domain_desc = None
        else:
            data = Table(
                Domain(selected, self.data.domain.class_var,
                       self.data.domain.metas), self.data)
            self.send("Reduced Data", data)
            self.out_domain_desc = report.describe_domain(data.domain)

    def selectedAttrs(self):
        if self.data:
            inds = self.ranksView.selectionModel().selectedRows(0)
            source = self.ranksProxyModel.mapToSource
            inds = map(source, inds)
            inds = [ind.row() for ind in inds]
            return [self.data.domain.attributes[i] for i in inds]
        else:
            return []

    def create_scores_table(self, labels):
        indices = [
            i for i, m in enumerate(self.measures)
            if self.selectedMeasures.get(m.name, False)
        ]
        measures = [
            s.name for s in self.measures
            if self.selectedMeasures.get(s.name, False)
        ]
        measures += [label for label in labels]
        if not measures:
            return None
        features = [ContinuousVariable(s) for s in measures]
        metas = [StringVariable("Feature name")]
        domain = Domain(features, metas=metas)

        scores = np.array([
            row for i, row in enumerate(self.measure_scores)
            if i in indices or i >= len(self.measures)
        ]).T
        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table
Example #27
0
class OWKeywords(OWWidget, ConcurrentWidgetMixin):
    name = "Extract Keywords"
    description = "Infers characteristic words from the input corpus."
    icon = "icons/Keywords.svg"
    priority = 1100
    keywords = ["characteristic", "term"]

    DEFAULT_SORTING = (1, Qt.DescendingOrder)

    settingsHandler = DomainContextHandler()
    selected_scoring_methods: Set[str] = Setting({ScoringMethods.TF_IDF})
    yake_lang_index: int = Setting(YAKE_LANGUAGES.index("English"))
    rake_lang_index: int = Setting(RAKE_LANGUAGES.index("English"))
    agg_method: int = Setting(AggregationMethods.MEAN)
    sel_method: int = ContextSetting(SelectionMethods.N_BEST)
    n_selected: int = ContextSetting(3)
    sort_column_order: Tuple[int, int] = Setting(DEFAULT_SORTING)
    selected_words = ContextSetting([], schema_only=True)
    auto_apply: bool = Setting(True)

    class Inputs:
        corpus = Input("Corpus", Corpus)
        words = Input("Words", Table)

    class Outputs:
        words = Output("Words", Corpus)

    class Warning(OWWidget.Warning):
        no_words_column = Msg("Input is missing 'Words' column.")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.corpus: Optional[Corpus] = None
        self.words: Optional[List] = None
        self.__cached_keywords = {}
        self.model = KeywordsTableModel(parent=self)
        self._setup_gui()

    def _setup_gui(self):
        grid = QGridLayout()
        box = gui.widgetBox(self.controlArea, "Scoring Methods", grid)

        yake_cb = gui.comboBox(self.controlArea,
                               self,
                               "yake_lang_index",
                               items=YAKE_LANGUAGES,
                               callback=self.__on_yake_lang_changed)
        rake_cb = gui.comboBox(self.controlArea,
                               self,
                               "rake_lang_index",
                               items=RAKE_LANGUAGES,
                               callback=self.__on_rake_lang_changed)

        for i, (method_name, _) in enumerate(ScoringMethods.ITEMS):
            check_box = QCheckBox(method_name, self)
            check_box.setChecked(method_name in self.selected_scoring_methods)
            check_box.stateChanged.connect(
                lambda state, name=method_name: self.
                __on_scoring_method_state_changed(state, name))
            box.layout().addWidget(check_box, i, 0)
            if method_name == ScoringMethods.YAKE:
                box.layout().addWidget(yake_cb, i, 1)
            if method_name == ScoringMethods.RAKE:
                box.layout().addWidget(rake_cb, i, 1)

        box = gui.vBox(self.controlArea, "Aggregation")
        gui.comboBox(box,
                     self,
                     "agg_method",
                     items=AggregationMethods.ITEMS,
                     callback=self.update_scores)

        box = gui.vBox(self.controlArea, "Select Words")
        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(grid)

        self.__sel_method_buttons = QButtonGroup()
        for method, label in enumerate(SelectionMethods.ITEMS):
            button = QRadioButton(label)
            button.setChecked(method == self.sel_method)
            grid.addWidget(button, method, 0)
            self.__sel_method_buttons.addButton(button, method)
        self.__sel_method_buttons.buttonClicked[int].connect(
            self._set_selection_method)

        spin = gui.spin(box,
                        self,
                        "n_selected",
                        1,
                        999,
                        addToLayout=False,
                        callback=lambda: self._set_selection_method(
                            SelectionMethods.N_BEST))
        grid.addWidget(spin, 3, 1)

        gui.rubber(self.controlArea)
        gui.auto_send(self.buttonsArea, self, "auto_apply")

        self.__filter_line_edit = QLineEdit(
            textChanged=self.__on_filter_changed, placeholderText="Filter...")
        self.mainArea.layout().addWidget(self.__filter_line_edit)

        def select_manual():
            self._set_selection_method(SelectionMethods.MANUAL)

        self.view = KeywordsTableView()
        self.view.pressedAny.connect(select_manual)
        self.view.horizontalHeader().setSortIndicator(*self.DEFAULT_SORTING)
        self.view.horizontalHeader().sectionClicked.connect(
            self.__on_horizontal_header_clicked)
        self.mainArea.layout().addWidget(self.view)

        proxy = SortFilterProxyModel()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterCaseSensitivity(False)
        self.view.setModel(proxy)
        self.view.model().setSourceModel(self.model)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection_changed)

    def __on_scoring_method_state_changed(self, state: int, method_name: str):
        if state == Qt.Checked:
            self.selected_scoring_methods.add(method_name)
        elif method_name in self.selected_scoring_methods:
            self.selected_scoring_methods.remove(method_name)
        self.update_scores()

    def __on_yake_lang_changed(self):
        if ScoringMethods.YAKE in self.selected_scoring_methods:
            if ScoringMethods.YAKE in self.__cached_keywords:
                del self.__cached_keywords[ScoringMethods.YAKE]
            self.update_scores()

    def __on_rake_lang_changed(self):
        if ScoringMethods.RAKE in self.selected_scoring_methods:
            if ScoringMethods.RAKE in self.__cached_keywords:
                del self.__cached_keywords[ScoringMethods.RAKE]
            self.update_scores()

    def __on_filter_changed(self):
        model = self.view.model()
        model.setFilterFixedString(self.__filter_line_edit.text().strip())
        self._select_rows()

    def __on_horizontal_header_clicked(self, index: int):
        header = self.view.horizontalHeader()
        self.sort_column_order = (index, header.sortIndicatorOrder())
        self._select_rows()
        # explicitly call commit, because __on_selection_changed will not be
        # invoked, since selection is actually the same, only order is not
        if self.sel_method == SelectionMethods.MANUAL and self.selected_words \
                or self.sel_method == SelectionMethods.ALL:
            self.commit()

    def __on_selection_changed(self):
        selected_rows = self.view.selectionModel().selectedRows(0)
        model = self.view.model()
        self.selected_words = [
            model.data(model.index(i.row(), 0)) for i in selected_rows
        ]
        self.commit()

    @Inputs.corpus
    def set_corpus(self, corpus: Optional[Corpus]):
        self.closeContext()
        self._clear()
        self.corpus = corpus
        self.openContext(self.corpus)
        self.__sel_method_buttons.button(self.sel_method).setChecked(True)

    def _clear(self):
        self.clear_messages()
        self.cancel()
        self.selected_words = []
        self.model.clear()
        self.__cached_keywords = {}

    @Inputs.words
    def set_words(self, words: Optional[Table]):
        self.words = None
        self.Warning.no_words_column.clear()
        if words:
            if WORDS_COLUMN_NAME in words.domain and words.domain[
                    WORDS_COLUMN_NAME].attributes.get("type") == "words":
                self.words = list(words.get_column_view(WORDS_COLUMN_NAME)[0])
            else:
                self.Warning.no_words_column()

    def handleNewSignals(self):
        self.update_scores()

    def update_scores(self):
        kwargs = {
            ScoringMethods.YAKE: {
                "language": YAKE_LANGUAGES[self.yake_lang_index],
                "max_len": self.corpus.ngram_range[1] if self.corpus else 1
            },
            ScoringMethods.RAKE: {
                "language": RAKE_LANGUAGES[self.rake_lang_index],
                "max_len": self.corpus.ngram_range[1] if self.corpus else 1
            },
        }
        self.start(run, self.corpus, self.words, self.__cached_keywords,
                   self.selected_scoring_methods, kwargs, self.agg_method)

    def _set_selection_method(self, method: int):
        self.sel_method = method
        self.__sel_method_buttons.button(method).setChecked(True)
        self._select_rows()

    def _select_rows(self):
        model = self.view.model()
        n_rows, n_columns = model.rowCount(), model.columnCount()
        if self.sel_method == SelectionMethods.NONE:
            selection = QItemSelection()
        elif self.sel_method == SelectionMethods.ALL:
            selection = QItemSelection(model.index(0, 0),
                                       model.index(n_rows - 1, n_columns - 1))
        elif self.sel_method == SelectionMethods.MANUAL:
            selection = QItemSelection()
            for i in range(n_rows):
                word = model.data(model.index(i, 0))
                if word in self.selected_words:
                    _selection = QItemSelection(model.index(i, 0),
                                                model.index(i, n_columns - 1))
                    selection.merge(_selection, QItemSelectionModel.Select)
        elif self.sel_method == SelectionMethods.N_BEST:
            n_sel = min(self.n_selected, n_rows)
            selection = QItemSelection(model.index(0, 0),
                                       model.index(n_sel - 1, n_columns - 1))
        else:
            raise NotImplementedError

        self.view.selectionModel().select(selection,
                                          QItemSelectionModel.ClearAndSelect)

    def on_exception(self, ex: Exception):
        raise ex

    def on_partial_result(self, _: Any):
        pass

    # pylint: disable=arguments-differ
    def on_done(self, results: Results):
        self.__cached_keywords = results.all_keywords
        self.model.wrap(results.scores)
        self.model.setHorizontalHeaderLabels(["Word"] + results.labels)
        self._apply_sorting()
        if self.model.rowCount() > 0:
            self._select_rows()
        else:
            self.__on_selection_changed()

    def _apply_sorting(self):
        if self.model.columnCount() <= self.sort_column_order[0]:
            self.sort_column_order = self.DEFAULT_SORTING

        header = self.view.horizontalHeader()
        current_sorting = (header.sortIndicatorSection(),
                           header.sortIndicatorOrder())
        if current_sorting != self.sort_column_order:
            header.setSortIndicator(*self.sort_column_order)
            # needed to sort nans; 1. column has strings
            # if self.sort_column_order[0] > 0:
            #     self.model.sort(*self.sort_column_order)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def commit(self):
        words = None
        if self.selected_words:
            words_var = StringVariable(WORDS_COLUMN_NAME)
            words_var.attributes = {"type": "words"}
            model = self.model
            attrs = [
                ContinuousVariable(model.headerData(i + 1, Qt.Horizontal))
                for i in range(len(self.selected_scoring_methods))
            ]
            domain = Domain(attrs, metas=[words_var])

            sort_column, reverse = self.sort_column_order
            data = sorted(model, key=lambda a: a[sort_column], reverse=reverse)
            data = [s[1:] + s[:1] for s in data if s[0] in self.selected_words]
            words = Table.from_list(domain, data)
            words.name = "Words"

        self.Outputs.words.send(words)

    def send_report(self):
        if not self.corpus:
            return
        self.report_data("Corpus", self.corpus)
        if self.words is not None:
            self.report_paragraph("Words", ", ".join(self.words))
        self.report_table("Keywords", self.view, num_format="{:.3f}")
Example #28
0
class OWTranspose(OWWidget):
    name = "Transpose"
    description = "Transpose data table."
    icon = "icons/Transpose.svg"
    priority = 2000

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table, dynamic=False)

    resizing_enabled = False
    want_main_area = False

    settingsHandler = DomainContextHandler()
    feature_type = ContextSetting(0)
    feature_names_column = ContextSetting(None)
    auto_apply = Setting(True)

    class Error(OWWidget.Error):
        value_error = Msg("{}")

    def __init__(self):
        super().__init__()
        self.data = None

        # GUI
        box = gui.vBox(self.controlArea, "Feature names")
        self.feature_radio = gui.radioButtonsInBox(
            box,
            self,
            "feature_type",
            callback=lambda: self.apply(),
            btnLabels=["Generic", "From meta attribute:"])

        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=StringVariable,
                                         alphabetical=True)
        self.feature_combo = gui.comboBox(gui.indentedBox(
            box, gui.checkButtonOffsetHint(self.feature_radio.buttons[0])),
                                          self,
                                          "feature_names_column",
                                          callback=self._feature_combo_changed,
                                          model=self.feature_model)

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "&Apply",
                                            box=False,
                                            commit=self.apply)

    def _feature_combo_changed(self):
        self.feature_type = 1
        self.apply()

    @Inputs.data
    def set_data(self, data):
        # Skip the context if the combo is empty: a context with
        # feature_model == None would then match all domains
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.update_controls()
        if self.data is not None and self.feature_model:
            self.openContext(data)
        self.apply()

    def update_controls(self):
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.feature_model:
                self.feature_names_column = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.feature_radio.buttons[1].setEnabled(enabled)
        self.feature_combo.setEnabled(enabled)
        self.feature_type = int(enabled)

    def apply(self):
        self.clear_messages()
        transposed = None
        if self.data:
            try:
                transposed = Table.transpose(
                    self.data, self.feature_type and self.feature_names_column)
            except ValueError as e:
                self.Error.value_error(e)
        self.Outputs.data.send(transposed)

    def send_report(self):
        text = "from meta attribute: {}".format(self.feature_names_column) \
            if self.feature_type else "generic"
        self.report_items("", [("Feature names", text)])
        if self.data:
            self.report_data("Data", self.data)
Example #29
0
class OWScatterPlot(OWWidget):
    name = 'Scatter plot'
    description = 'Scatter plot visualization'

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data"),
              ("Features", AttributeList, "set_shown_attributes")]

    outputs = [("Selected Data", Table), ("Other Data", Table)]

    settingsHandler = DomainContextHandler()

    auto_send_selection = Setting(True)
    toolbar_selection = Setting(0)
    color_settings = Setting(None)
    selected_schema_index = Setting(0)

    attr_x = ContextSetting("")
    attr_y = ContextSetting("")

    graph = SettingProvider(OWScatterPlotGraph)
    zoom_select_toolbar = SettingProvider(ZoomSelectToolbar)

    jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10]

    def __init__(self):
        super().__init__()

        box = gui.widgetBox(self.mainArea, True, margin=0)
        self.graph = OWScatterPlotGraph(self, box, "ScatterPlot")
        box.layout().addWidget(self.graph.plot_widget)

        self.data = None  # Orange.data.Table
        self.subset_data = None  # Orange.data.Table
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.selection_dirty = False

        common_options = {
            "labelWidth": 50,
            "orientation": "horizontal",
            "sendSelectedValue": True,
            "valueType": str
        }
        box = gui.widgetBox(self.controlArea, "Axis Data")
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.major_graph_update,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.major_graph_update,
                                      **common_options)
        gui.valueSlider(box,
                        self,
                        value='graph.jitter_size',
                        label='Jittering: ',
                        values=self.jitter_sizes,
                        callback=self.reset_graph_data,
                        labelFormat=lambda x: "None"
                        if x == 0 else ("%.1f %%" if x < 1 else "%d %%") % x)
        gui.checkBox(gui.indentedBox(box),
                     self,
                     'graph.jitter_continuous',
                     'Jitter continuous values',
                     callback=self.reset_graph_data)

        box = gui.widgetBox(self.controlArea, "Points")
        self.cb_attr_color = gui.comboBox(box,
                                          self,
                                          "graph.attr_color",
                                          label="Color:",
                                          emptyString="(Same color)",
                                          callback=self.graph.update_colors,
                                          **common_options)
        self.cb_attr_label = gui.comboBox(box,
                                          self,
                                          "graph.attr_label",
                                          label="Label:",
                                          emptyString="(No labels)",
                                          callback=self.graph.update_labels,
                                          **common_options)
        self.cb_attr_shape = gui.comboBox(box,
                                          self,
                                          "graph.attr_shape",
                                          label="Shape:",
                                          emptyString="(Same shape)",
                                          callback=self.graph.update_shapes,
                                          **common_options)
        self.cb_attr_size = gui.comboBox(box,
                                         self,
                                         "graph.attr_size",
                                         label="Size:",
                                         emptyString="(Same size)",
                                         callback=self.graph.update_sizes,
                                         **common_options)

        g = self.graph.gui
        box2 = g.point_properties_box(self.controlArea, box)
        gui.button(box2, self, "Set Colors", self.set_colors)

        box = gui.widgetBox(self.controlArea, "Plot Properties")
        g.add_widgets([g.ShowLegend, g.ShowGridLines], box)
        gui.checkBox(box,
                     self,
                     value='graph.tooltip_shows_all',
                     label='Show all data on mouse hover')

        gui.separator(self.controlArea, 8, 8)
        self.zoom_select_toolbar = g.zoom_select_toolbar(
            self.controlArea,
            nomargin=True,
            buttons=[
                g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom,
                g.StateButtonsEnd, g.ZoomReset, g.Spacing, g.SendSelection
            ])
        buttons = self.zoom_select_toolbar.buttons
        buttons[g.SendSelection].clicked.connect(self.send_selection)
        buttons[g.Zoom].clicked.connect(self.graph.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.graph.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(
            self.graph.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.graph.reset_button_clicked)
        cb_auto_send = gui.checkBox(box, self, 'auto_send_selection',
                                    'Send selection on change')
        gui.setStopper(self, buttons[g.SendSelection], cb_auto_send,
                       "selection_dirty", self.send_selection)
        self.controlArea.layout().addStretch(100)
        self.icons = gui.attributeIconDict

        dlg = self.create_color_dialog()
        self.graph.continuous_palette = dlg.getContinuousPalette("contPalette")
        self.graph.discrete_palette = dlg.getDiscretePalette("discPalette")
        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        self.zoom_select_toolbar.buttons[OWPlotGUI.SendSelection].setEnabled(
            not self.auto_send_selection)

        self.mainArea.setMinimumWidth(700)
        self.mainArea.setMinimumHeight(550)

        # self.vizrank = OWVizRank(self, self.signalManager, self.graph,
        #                          orngVizRank.SCATTERPLOT, "ScatterPlot")
        # self.optimizationDlg = self.vizrank

    # def settingsFromWidgetCallback(self, handler, context):
    #     context.selectionPolygons = []
    #     for curve in self.graph.selectionCurveList:
    #         xs = [curve.x(i) for i in range(curve.dataSize())]
    #         ys = [curve.y(i) for i in range(curve.dataSize())]
    #         context.selectionPolygons.append((xs, ys))

    # def settingsToWidgetCallback(self, handler, context):
    #     selections = getattr(context, "selectionPolygons", [])
    #     for (xs, ys) in selections:
    #         c = SelectionCurve("")
    #         c.setData(xs,ys)
    #         c.attach(self.graph)
    #         self.graph.selectionCurveList.append(c)

    def reset_graph_data(self, *_):
        self.graph.rescale_data()
        self.major_graph_update()

    def set_data(self, data: Orange.data.Table):
        if data is not None and (len(data) == 0 or len(data.domain) == 0):
            data = None
        if self.data and data and self.data.checksum() == data.checksum():
            return

        self.closeContext()
        same_domain = \
            self.data and data and \
            data.domain.checksum() == self.data.domain.checksum()
        self.data = data

        # TODO: adapt scatter plot to work on SqlTables (avoid use of X and Y)
        if isinstance(self.data, SqlTable):
            self.data.X = np.empty(
                (len(self.data), len(self.data.domain.attributes)))
            self.data.Y = np.empty(
                (len(self.data), len(self.data.domain.class_vars)))
            for i, row in enumerate(data):
                self.data.X[i] = [
                    row[attr] for attr in self.data.domain.attributes
                ]
                if self.data.domain.class_vars:
                    self.data.Y[i] = [
                        row[cv] for cv in self.data.domain.class_vars
                    ]

        # self.vizrank.clearResults()
        if not same_domain:
            self.init_attr_values()
        self.openContext(self.data)

    def set_subset_data(self, subset_data):
        self.subset_data = subset_data
        # self.vizrank.clearArguments()

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        self.graph.set_data(self.data, self.subset_data)
        # self.vizrank.resetDialog()
        if self.attribute_selection_list and \
                all(attr in self.graph.attribute_name_index
                    for attr in self.attribute_selection_list):
            self.attr_x = self.attribute_selection_list[0]
            self.attr_y = self.attribute_selection_list[1]
        self.attribute_selection_list = None
        self.update_graph()
        self.send_selection()

    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
        else:
            self.attribute_selection_list = None

    # Callback from VizRank dialog
    def show_selected_attributes(self):
        val = self.vizrank.get_selected_projection()
        if not val:
            return
        if self.data.domain.class_var:
            self.graph.attr_color = self.data.domain.class_var.name
        self.major_graph_update(val[3])

    def get_shown_attributes(self):
        return self.attr_x, self.attr_y

    def init_attr_values(self):
        self.cb_attr_x.clear()
        self.cb_attr_y.clear()
        self.cb_attr_color.clear()
        self.cb_attr_color.addItem("(Same color)")
        self.cb_attr_label.clear()
        self.cb_attr_label.addItem("(No labels)")
        self.cb_attr_shape.clear()
        self.cb_attr_shape.addItem("(Same shape)")
        self.cb_attr_size.clear()
        self.cb_attr_size.addItem("(Same size)")
        if not self.data:
            return

        for var in self.data.domain.metas:
            self.cb_attr_label.addItem(self.icons[var], var.name)
        for attr in self.data.domain.variables:
            self.cb_attr_x.addItem(self.icons[attr], attr.name)
            self.cb_attr_y.addItem(self.icons[attr], attr.name)
            self.cb_attr_color.addItem(self.icons[attr], attr.name)
            if isinstance(attr, DiscreteVariable):
                self.cb_attr_shape.addItem(self.icons[attr], attr.name)
            else:
                self.cb_attr_size.addItem(self.icons[attr], attr.name)
            self.cb_attr_label.addItem(self.icons[attr], attr.name)

        self.attr_x = self.cb_attr_x.itemText(0)
        if self.cb_attr_y.count() > 1:
            self.attr_y = self.cb_attr_y.itemText(1)
        else:
            self.attr_y = self.cb_attr_y.itemText(0)

        if self.data.domain.class_var:
            self.graph.attr_color = self.data.domain.class_var.name
        else:
            self.graph.attr_color = ""
        self.graph.attr_shape = ""
        self.graph.attr_size = ""
        self.graph.attr_label = ""

    def major_graph_update(self, attributes=None, inside_colors=None, **args):
        self.update_graph(attributes, inside_colors, **args)

    def update_graph(self, attributes=None, inside_colors=None, **_):
        self.graph.zoomStack = []
        if not self.graph.have_data:
            return
        if attributes and len(attributes) == 2:
            self.attr_x, self.attr_y = attributes
        self.graph.update_data(self.attr_x, self.attr_y)

    def saveSettings(self):
        OWWidget.saveSettings(self)
        # self.vizrank.saveSettings()

    """
    def auto_selection_changed(self):
        self.zoom_select_toolbar.buttons[OWPlotGUI.SendSelection].setEnabled(
            not self.auto_send_selection)
        if self.auto_send_selection:
            self.send_selection()
    """

    def selection_changed(self):
        if self.auto_send_selection:
            self.send_selection()
        else:
            self.selection_dirty = True

    def send_selection(self):
        self.selection_dirty = False
        selection = self.graph.get_selection()
        selected = self.data[selection]
        unselection = np.full(len(self.data), True, dtype=bool)
        unselection[selection] = False
        unselected = self.data[unselection]
        self.send("Selected Data", selected)
        self.send("Other Data", unselected)

    def set_colors(self):
        dlg = self.create_color_dialog()
        if dlg.exec_():
            self.color_settings = dlg.getColorSchemas()
            self.selected_schema_index = dlg.selectedSchemaIndex
            self.graph.continuous_palette = dlg.getContinuousPalette(
                "contPalette")
            self.graph.discrete_palette = dlg.getDiscretePalette("discPalette")
            self.update_graph()

    def create_color_dialog(self):
        c = ColorPaletteDlg(self, "Color Palette")
        c.createDiscretePalette("discPalette", "Discrete Palette")
        c.createContinuousPalette("contPalette", "Continuous Palette")
        c.setColorSchemas(self.color_settings, self.selected_schema_index)
        return c

    def closeEvent(self, ce):
        # self.vizrank.close()
        super().closeEvent(ce)

    def sendReport(self):
        self.startReport("%s [%s - %s]" %
                         (self.windowTitle(), self.attr_x, self.attr_y))
        self.reportSettings(
            "Visualized attributes",
            [("X", self.attr_x), ("Y", self.attr_y), self.graph.attr_color and
             ("Color", self.graph.attr_color), self.graph.attr_label and
             ("Label", self.graph.attr_label), self.graph.attr_shape and
             ("Shape", self.graph.attr_shape), self.graph.attr_size and
             ("Size", self.graph.attr_size)])
        self.reportSettings("Settings",
                            [("Symbol size", self.graph.point_width),
                             ("Opacity", self.graph.alpha_value),
                             ("Jittering", self.graph.jitter_size),
                             ("Jitter continuous attributes",
                              gui.YesNo[self.graph.jitter_continuous])])
        self.reportSection("Graph")
        self.reportImage(self.graph.save_to_file, QSize(400, 400))
Example #30
0
class OWTranspose(OWWidget):
    name = "矩阵转置"
    description = "转置数据表。"
    icon = "icons/Transpose.svg"
    priority = 2000
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table, dynamic=False)

    GENERIC, FROM_META_ATTR = range(2)

    resizing_enabled = False
    want_main_area = False

    DEFAULT_PREFIX = "Feature"

    settingsHandler = DomainContextHandler()
    feature_type = ContextSetting(GENERIC)
    feature_name = ContextSetting("")
    feature_names_column = ContextSetting(None)
    auto_apply = Setting(True)

    class Error(OWWidget.Error):
        value_error = Msg("{}")

    def __init__(self):
        super().__init__()
        self.data = None

        box = gui.radioButtons(self.controlArea,
                               self,
                               "feature_type",
                               box="特征名称",
                               callback=lambda: self.apply())

        button = gui.appendRadioButton(box, "通用")
        edit = gui.lineEdit(gui.indentedBox(box,
                                            gui.checkButtonOffsetHint(button)),
                            self,
                            "feature_name",
                            placeholderText="Type a prefix ...",
                            toolTip="自定义特征名称")
        edit.editingFinished.connect(self._apply_editing)

        self.meta_button = gui.appendRadioButton(box, "来自元属性:")
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=StringVariable,
                                         alphabetical=True)
        self.feature_combo = gui.comboBox(gui.indentedBox(
            box, gui.checkButtonOffsetHint(button)),
                                          self,
                                          "feature_names_column",
                                          contentsLength=12,
                                          callback=self._feature_combo_changed,
                                          model=self.feature_model)

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "应用",
                                            box=False,
                                            commit=self.apply)
        self.apply_button.button.setAutoDefault(False)

        self.set_controls()

    def _apply_editing(self):
        self.feature_type = self.GENERIC
        self.feature_name = self.feature_name.strip()
        self.apply()

    def _feature_combo_changed(self):
        self.feature_type = self.FROM_META_ATTR
        self.apply()

    @Inputs.data
    def set_data(self, data):
        # Skip the context if the combo is empty: a context with
        # feature_model == None would then match all domains
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.set_controls()
        if self.feature_model:
            self.openContext(data)
        self.apply()

    def set_controls(self):
        self.feature_model.set_domain(self.data and self.data.domain)
        self.meta_button.setEnabled(bool(self.feature_model))
        if self.feature_model:
            self.feature_names_column = self.feature_model[0]
            self.feature_type = self.FROM_META_ATTR
        else:
            self.feature_names_column = None

    def apply(self):
        self.clear_messages()
        transposed = None
        if self.data:
            try:
                transposed = Table.transpose(
                    self.data,
                    self.feature_type == self.FROM_META_ATTR
                    and self.feature_names_column,
                    feature_name=self.feature_name or self.DEFAULT_PREFIX)
            except ValueError as e:
                self.Error.value_error(e)
        self.Outputs.data.send(transposed)

    def send_report(self):
        if self.feature_type == self.GENERIC:
            names = self.feature_name or self.DEFAULT_PREFIX
        else:
            names = "from meta attribute"
            if self.feature_names_column:
                names += "  '{}'".format(self.feature_names_column.name)
        self.report_items("", [("Feature names", names)])
        if self.data:
            self.report_data("Data", self.data)
Example #31
0
class OWFeatureStatistics(widget.OWWidget):
    name = 'Feature Statistics'
    description = 'Show basic statistics for data features.'
    icon = 'icons/FeatureStatistics.svg'

    class Inputs:
        data = Input('Data', Table, default=True)

    class Outputs:
        reduced_data = Output('Reduced Data', Table, default=True)
        statistics = Output('Statistics', Table)

    want_main_area = True
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()

    auto_commit = ContextSetting(True)
    color_var = ContextSetting(None)  # type: Optional[Variable]
    # filter_string = ContextSetting('')

    sorting = ContextSetting((0, Qt.DescendingOrder))
    selected_rows = ContextSetting([])

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]

        # Information panel
        info_box = gui.vBox(self.controlArea, 'Info')
        info_box.setMinimumWidth(200)
        self.info_summary = gui.widgetLabel(info_box, wordWrap=True)
        self.info_attr = gui.widgetLabel(info_box, wordWrap=True)
        self.info_class = gui.widgetLabel(info_box, wordWrap=True)
        self.info_meta = gui.widgetLabel(info_box, wordWrap=True)
        self.set_info()

        # TODO: Implement filtering on the model
        # filter_box = gui.vBox(self.controlArea, 'Filter')
        # self.filter_text = gui.lineEdit(
        #     filter_box, self, value='filter_string',
        #     placeholderText='Filter variables by name',
        #     callback=self._filter_table_variables, callbackOnType=True,
        # )
        # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus)
        # shortcut.setWhatsThis('Filter variables by name')

        self.color_var_model = DomainModel(
            valid_types=(ContinuousVariable, DiscreteVariable),
            placeholder='None',
        )
        box = gui.vBox(self.controlArea, 'Histogram')
        self.cb_color_var = gui.comboBox(
            box, master=self, value='color_var', model=self.color_var_model,
            label='Color:', orientation=Qt.Horizontal,
        )
        self.cb_color_var.activated.connect(self.__color_var_changed)

        gui.rubber(self.controlArea)
        gui.auto_commit(
            self.buttonsArea, self, 'auto_commit', 'Send Selected Rows',
            'Send Automatically',
        )

        # Main area
        self.model = FeatureStatisticsTableModel(parent=self)
        self.table_view = FeatureStatisticsTableView(self.model, parent=self)
        self.table_view.selectionModel().selectionChanged.connect(self.on_select)
        self.table_view.horizontalHeader().sectionClicked.connect(self.on_header_click)

        self.mainArea.layout().addWidget(self.table_view)

    def sizeHint(self):
        return QSize(1050, 500)

    def _filter_table_variables(self):
        regex = QRegExp(self.filter_string)
        # If the user explicitly types different cases, we assume they know
        # what they are searching for and account for letter case in filter
        different_case = (
            any(c.islower() for c in self.filter_string) and
            any(c.isupper() for c in self.filter_string)
        )
        if not different_case:
            regex.setCaseSensitivity(Qt.CaseInsensitive)

    @Inputs.data
    def set_data(self, data):
        # Clear outputs and reset widget state
        self.closeContext()
        self.selected_rows = []
        self.model.resetSorting()
        self.Outputs.reduced_data.send(None)
        self.Outputs.statistics.send(None)

        # Setup widget state for new data and restore settings
        self.data = data

        if data is not None:
            self.color_var_model.set_domain(data.domain)
            if self.data.domain.class_vars:
                self.color_var = self.data.domain.class_vars[0]
        else:
            self.color_var_model.set_domain(None)
            self.color_var = None
        self.model.set_data(data)

        self.openContext(self.data)
        self.__restore_selection()
        self.__restore_sorting()
        # self._filter_table_variables()
        self.__color_var_changed()

        self.set_info()
        self.commit()

    def __restore_selection(self):
        """Restore the selection on the table view from saved settings."""
        selection_model = self.table_view.selectionModel()
        selection = QItemSelection()
        if len(self.selected_rows):
            for row in self.model.mapFromSourceRows(self.selected_rows):
                selection.append(QItemSelectionRange(
                    self.model.index(row, 0),
                    self.model.index(row, self.model.columnCount() - 1)
                ))
        selection_model.select(selection, QItemSelectionModel.ClearAndSelect)

    def __restore_sorting(self):
        """Restore the sort column and order from saved settings."""
        sort_column, sort_order = self.sorting
        if sort_column < self.model.columnCount():
            self.model.sort(sort_column, sort_order)
            self.table_view.horizontalHeader().setSortIndicator(sort_column, sort_order)

    @pyqtSlot(int)
    def on_header_click(self, *_):
        # Store the header states
        sort_order = self.model.sortOrder()
        sort_column = self.model.sortColumn()
        self.sorting = sort_column, sort_order

    @pyqtSlot(int)
    def __color_var_changed(self, *_):
        if self.model is not None:
            self.model.set_target_var(self.color_var)

    def _format_variables_string(self, variables):
        agg = []
        for var_type_name, var_type in [
                ('categorical', DiscreteVariable),
                ('numeric', ContinuousVariable),
                ('time', TimeVariable),
                ('string', StringVariable)
        ]:
            # Disable pylint here because a `TimeVariable` is also a
            # `ContinuousVariable`, and should be labelled as such. That is why
            # it is necessary to check the type this way instead of using
            # `isinstance`, which would fail in the above case
            var_type_list = [v for v in variables if type(v) is var_type]  # pylint: disable=unidiomatic-typecheck
            if var_type_list:
                shown = var_type in self.model.HIDDEN_VAR_TYPES
                agg.append((
                    '%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))

    def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')

    def on_select(self):
        self.selected_rows = self.model.mapToSourceRows([
            i.row() for i in self.table_view.selectionModel().selectedRows()
        ])
        self.commit()

    def commit(self):
        if not len(self.selected_rows):
            self.Outputs.reduced_data.send(None)
            self.Outputs.statistics.send(None)
            return

        # Send a table with only selected columns to output
        variables = self.model.variables[self.selected_rows]
        self.Outputs.reduced_data.send(self.data[:, variables])

        # Send the statistics of the selected variables to ouput
        labels, data = self.model.get_statistics_matrix(variables, return_labels=True)
        var_names = np.atleast_2d([var.name for var in variables]).T
        domain = Domain(
            attributes=[ContinuousVariable(name) for name in labels],
            metas=[StringVariable('Feature')]
        )
        statistics = Table(domain, data, metas=var_names)
        statistics.name = '%s (Feature Statistics)' % self.data.name
        self.Outputs.statistics.send(statistics)

    def send_report(self):
        pass
Example #32
0
class OWScatterPlot(OWWidget):
    """Scatterplot visualization with explorative analysis and intelligent
    data visualization enhancements."""

    name = 'Scatter Plot'
    description = "Interactive scatter plot visualization with " \
                  "intelligent data visualization enhancements."
    icon = "icons/ScatterPlot.svg"
    priority = 140

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        features = Input("Features", AttributeList)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        features = Output("Features", AttributeList, dynamic=False)

    settings_version = 2
    settingsHandler = DomainContextHandler()

    auto_send_selection = Setting(True)
    auto_sample = Setting(True)
    toolbar_selection = Setting(0)

    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection_group = Setting(None, schema_only=True)

    graph = SettingProvider(OWScatterPlotGraph)

    jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10]

    graph_name = "graph.plot_widget.plotItem"

    class Information(OWWidget.Information):
        sampled_sql = Msg("Large SQL table; showing a sample.")

    def __init__(self):
        super().__init__()

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWScatterPlotGraph(self, box, "ScatterPlot")
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        axispen = QPen(self.palette().color(QPalette.Text))
        axis = plot.getAxis("bottom")
        axis.setPen(axispen)

        axis = plot.getAxis("left")
        axis.setPen(axispen)

        self.data = None  # Orange.data.Table
        self.subset_data = None  # Orange.data.Table
        self.data_metas_X = None  # self.data, where primitive metas are moved to X
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)
        box = gui.vBox(self.controlArea, "Axis Data")
        dmod = DomainModel
        self.xy_model = DomainModel(dmod.MIXED, valid_types=dmod.PRIMITIVE)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.update_attr,
                                      model=self.xy_model,
                                      **common_options)

        vizrank_box = gui.hBox(box)
        gui.separator(vizrank_box, width=common_options["labelWidth"])
        self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank(
            vizrank_box, self, "Find Informative Projections", self.set_attr)

        gui.separator(box)

        g = self.graph.gui
        g.add_widgets([g.JitterSizeSlider, g.JitterNumericValues], box)

        self.sampling = gui.auto_commit(self.controlArea,
                                        self,
                                        "auto_sample",
                                        "Sample",
                                        box="Sampling",
                                        callback=self.switch_sampling,
                                        commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

        g.point_properties_box(self.controlArea)
        self.models = [self.xy_model] + g.points_models

        box_plot_prop = gui.vBox(self.controlArea, "Plot Properties")
        g.add_widgets([
            g.ShowLegend, g.ShowGridLines, g.ToolTipShowsAll, g.ClassDensity,
            g.RegressionLine, g.LabelOnlySelected
        ], box_plot_prop)

        self.graph.box_zoom_select(self.controlArea)

        self.controlArea.layout().addStretch(100)
        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_send_selection",
                        "Send Selection", "Send Automatically")

        self.graph.zoom_actions(self)

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self.update_graph()

    @Inputs.data
    def set_data(self, data):
        self.clear_messages()
        self.Information.sampled_sql.clear()
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(data, SqlTable):
            if data.approx_len() < 4000:
                data = Table(data)
            else:
                self.Information.sampled_sql()
                self.sql_data = data
                data_sample = data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if data is not None and (len(data) == 0 or len(data.domain) == 0):
            data = None
        if self.data and data and self.data.checksum() == data.checksum():
            return

        self.closeContext()
        same_domain = (self.data and data and data.domain.checksum()
                       == self.data.domain.checksum())
        self.data = data
        self.data_metas_X = self.move_primitive_metas_to_X(data)

        if not same_domain:
            self.init_attr_values()
        self.vizrank.initialize()
        self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []
        self.vizrank_button.setEnabled(
            self.data is not None and not self.data.is_sparse()
            and self.data.domain.class_var is not None
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1)
        if self.data is not None and self.data.domain.class_var is None \
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1:
            self.vizrank_button.setToolTip(
                "Data with a class variable is required.")
        else:
            self.vizrank_button.setToolTip("")
        self.openContext(self.data)

        def findvar(name, iterable):
            """Find a Orange.data.Variable in `iterable` by name"""
            for el in iterable:
                if isinstance(el, Orange.data.Variable) and el.name == name:
                    return el
            return None

        # handle restored settings from  < 3.3.9 when attr_* were stored
        # by name
        if isinstance(self.attr_x, str):
            self.attr_x = findvar(self.attr_x, self.xy_model)
        if isinstance(self.attr_y, str):
            self.attr_y = findvar(self.attr_y, self.xy_model)
        if isinstance(self.graph.attr_label, str):
            self.graph.attr_label = findvar(self.graph.attr_label,
                                            self.graph.gui.label_model)
        if isinstance(self.graph.attr_color, str):
            self.graph.attr_color = findvar(self.graph.attr_color,
                                            self.graph.gui.color_model)
        if isinstance(self.graph.attr_shape, str):
            self.graph.attr_shape = findvar(self.graph.attr_shape,
                                            self.graph.gui.shape_model)
        if isinstance(self.graph.attr_size, str):
            self.graph.attr_size = findvar(self.graph.attr_size,
                                           self.graph.gui.size_model)

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            return self.__timer.stop()
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.data_metas_X = self.move_primitive_metas_to_X(self.data)
            self.handleNewSignals()

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def move_primitive_metas_to_X(self, data):
        if data is not None:
            new_attrs = [
                a for a in data.domain.attributes + data.domain.metas
                if a.is_primitive()
            ]
            new_metas = [m for m in data.domain.metas if not m.is_primitive()]
            new_domain = Domain(new_attrs, data.domain.class_vars, new_metas)
            data = data.transform(new_domain)
        return data

    @Inputs.data_subset
    def set_subset_data(self, subset_data):
        self.warning()
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning("Data subset does not support large Sql tables")
                subset_data = None
        self.subset_data = self.move_primitive_metas_to_X(subset_data)
        self.controls.graph.alpha_value.setEnabled(subset_data is None)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        self.graph.new_data(self.data_metas_X, self.subset_data)
        if self.attribute_selection_list and self.graph.domain and \
                all(attr in self.graph.domain
                        for attr in self.attribute_selection_list):
            self.attr_x = self.attribute_selection_list[0]
            self.attr_y = self.attribute_selection_list[1]
        self.attribute_selection_list = None
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line())
        self.apply_selection()
        self.unconditional_commit()

    def apply_selection(self):
        """Apply selection saved in workflow."""
        if self.data is not None and self.selection_group is not None:
            self.graph.selection = np.zeros(len(self.data), dtype=np.uint8)
            self.selection_group = [
                x for x in self.selection_group if x[0] < len(self.data)
            ]
            selection_array = np.array(self.selection_group).T
            self.graph.selection[selection_array[0]] = selection_array[1]
            self.graph.update_colors(keep_colors=True)

    @Inputs.features
    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
        else:
            self.attribute_selection_list = None

    def init_attr_values(self):
        domain = self.data and self.data.domain
        for model in self.models:
            model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x
        self.graph.attr_color = self.data.domain.class_var if domain else None
        self.graph.attr_shape = None
        self.graph.attr_size = None
        self.graph.attr_label = None

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def update_attr(self):
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line())
        self.send_features()

    def update_colors(self):
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def update_density(self):
        self.update_graph(reset_view=False)

    def update_regression_line(self):
        self.update_graph(reset_view=False)

    def update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.attr_x, self.attr_y, reset_view)

    def selection_changed(self):
        self.commit()

    def send_data(self):
        # TODO: Implement selection for sql data
        def _get_selected():
            if not len(selection):
                return None
            return create_groups_table(data, graph.selection, False, "Group")

        def _get_annotated():
            if graph.selection is not None and np.max(graph.selection) > 1:
                return create_groups_table(data, graph.selection)
            else:
                return create_annotated_table(data, selection)

        graph = self.graph
        data = self.data
        selection = graph.get_selection()
        self.Outputs.annotated_data.send(_get_annotated())
        self.Outputs.selected_data.send(_get_selected())

        # Store current selection in a setting that is stored in workflow
        if len(selection):
            self.selection_group = list(
                zip(selection, graph.selection[selection]))
        else:
            self.selection_group = None

    def send_features(self):
        features = [attr for attr in [self.attr_x, self.attr_y] if attr]
        self.Outputs.features.send(features or None)

    def commit(self):
        self.send_data()
        self.send_features()

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert(
            (("Color", name(self.graph.attr_color)),
             ("Label", name(self.graph.attr_label)),
             ("Shape", name(self.graph.attr_shape)),
             ("Size", name(self.graph.attr_size)),
             ("Jittering", (self.attr_x.is_discrete or self.attr_y.is_discrete
                            or self.graph.jitter_continuous)
              and self.graph.jitter_size)))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2 and "selection" in settings and settings["selection"]:
            settings["selection_group"] = [(a, 1)
                                           for a in settings["selection"]]
Example #33
0
class OWHyper(OWWidget):
    name = "Hyperspectra"
    inputs = [("Data", Orange.data.Table, 'set_data', Default)]
    outputs = [("Selection", Orange.data.Table), ("Data", Orange.data.Table)]
    icon = "icons/hyper.svg"

    settings_version = 2
    settingsHandler = DomainContextHandler(metas_in_res=True)

    imageplot = SettingProvider(ImagePlot)
    curveplot = SettingProvider(CurvePlotHyper)

    integration_method = Setting(0)
    integration_methods = [Integrate.Simple, Integrate.Baseline,
                           Integrate.PeakMax, Integrate.PeakBaseline, Integrate.PeakAt]
    value_type = Setting(0)
    attr_value = ContextSetting(None)

    lowlim = Setting(None)
    highlim = Setting(None)
    choose = Setting(None)

    class Warning(OWWidget.Warning):
        threshold_error = Msg("Low slider should be less than High")

    class Error(OWWidget.Warning):
        image_too_big = Msg("Image for chosen features is too big ({} x {}).")

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            # delete the saved attr_value to prevent crashes
            try:
                del settings_["context_settings"][0].values["attr_value"]
            except:
                pass

    def __init__(self):
        super().__init__()

        dbox = gui.widgetBox(self.controlArea, "Image values")

        rbox = gui.radioButtons(
            dbox, self, "value_type", callback=self._change_integration)

        gui.appendRadioButton(rbox, "From spectra")

        self.box_values_spectra = gui.indentedBox(rbox)

        gui.comboBox(
            self.box_values_spectra, self, "integration_method", valueType=int,
            items=(a.name for a in self.integration_methods),
            callback=self._change_integral_type)
        gui.rubber(self.controlArea)

        gui.appendRadioButton(rbox, "Use feature")

        self.box_values_feature = gui.indentedBox(rbox)

        self.feature_value_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                               valid_types=DomainModel.PRIMITIVE)
        self.feature_value = gui.comboBox(
            self.box_values_feature, self, "attr_value",
            callback=self.update_feature_value, model=self.feature_value_model,
            sendSelectedValue=True, valueType=str)

        splitter = QSplitter(self)
        splitter.setOrientation(Qt.Vertical)
        self.imageplot = ImagePlot(self, self.image_selection_changed)
        self.curveplot = CurvePlotHyper(self, select=SELECTONE)
        self.curveplot.plot.vb.x_padding = 0.005  # pad view so that lines are not hidden
        splitter.addWidget(self.imageplot)
        splitter.addWidget(self.curveplot)
        self.mainArea.layout().addWidget(splitter)

        self.line1 = MovableVlineWD(position=self.lowlim, label="", setvalfn=self.set_lowlim,
                                    confirmfn=self.edited, report=self.curveplot)
        self.line2 = MovableVlineWD(position=self.highlim, label="", setvalfn=self.set_highlim,
                                    confirmfn=self.edited, report=self.curveplot)
        self.line3 = MovableVlineWD(position=self.choose, label="", setvalfn=self.set_choose,
                                    confirmfn=self.edited, report=self.curveplot)
        self.curveplot.add_marking(self.line1)
        self.curveplot.add_marking(self.line2)
        self.curveplot.add_marking(self.line3)
        self.line1.hide()
        self.line2.hide()
        self.line3.hide()

        self.data = None

        self.resize(900, 700)
        self.graph_name = "imageplot.plotview"
        self._update_integration_type()

    def image_selection_changed(self, indices):
        annotated = create_annotated_table(self.data, indices)
        self.send("Data", annotated)
        if self.data:
            selected = self.data[indices]
            self.send("Selection", selected if selected else None)
            if selected:
                self.curveplot.set_data(selected)
            else:
                self.curveplot.set_data(self.data)
        else:
            self.send("Selection", None)
            self.curveplot.set_data(None)
        self.curveplot.update_view()

    def selection_changed(self):
        self.redraw_data()

    def init_attr_values(self):
        domain = self.data.domain if self.data is not None else None
        self.feature_value_model.set_domain(domain)
        self.attr_value = self.feature_value_model[0] if self.feature_value_model else None

    def set_lowlim(self, v):
        self.lowlim = v

    def set_highlim(self, v):
        self.highlim = v

    def set_choose(self, v):
        self.choose = v

    def redraw_data(self):
        self.imageplot.set_integral_limits()

    def update_feature_value(self):
        self.redraw_data()

    def _update_integration_type(self):
        self.line1.hide()
        self.line2.hide()
        self.line3.hide()
        if self.value_type == 0:
            self.box_values_spectra.setDisabled(False)
            self.box_values_feature.setDisabled(True)
            if self.integration_methods[self.integration_method] != Integrate.PeakAt:
                self.line1.show()
                self.line2.show()
            else:
                self.line3.show()
        elif self.value_type == 1:
            self.box_values_spectra.setDisabled(True)
            self.box_values_feature.setDisabled(False)
        QTest.qWait(1)  # first update the interface

    def _change_integration(self):
        # change what to show on the image
        self._update_integration_type()
        self.redraw_data()

    def edited(self):
        self.redraw_data()

    def _change_integral_type(self):
        self._change_integration()

    def set_data(self, data):
        self.closeContext()
        self.curveplot.set_data(data)
        if data is not None:
            same_domain = (self.data and
                           data.domain.checksum() == self.data.domain.checksum())
            self.data = data
            if not same_domain:
                self.init_attr_values()
        else:
            self.data = None
        if self.curveplot.data_x is not None and len(self.curveplot.data_x):
            minx = self.curveplot.data_x[0]
            maxx = self.curveplot.data_x[-1]

            if self.lowlim is None or not minx <= self.lowlim <= maxx:
                self.lowlim = minx
            self.line1.setValue(self.lowlim)

            if self.highlim is None or not minx <= self.highlim <= maxx:
                self.highlim = maxx
            self.line2.setValue(self.highlim)

            if self.choose is None:
                self.choose = (minx + maxx)/2
            elif self.choose < minx:
                self.choose = minx
            elif self.choose > maxx:
                self.choose = maxx
            self.line3.setValue(self.choose)

        self.imageplot.set_data(data)
        self.openContext(data)
        self.curveplot.update_view()
        self.imageplot.update_view()

    # store selection as a list due to a bug in checking if numpy settings changed
    def storeSpecificSettings(self):
        selection = self.imageplot.selection
        if selection is not None:
            selection = list(selection)
        self.current_context.selection = selection

    def retrieveSpecificSettings(self):
        selection = getattr(self.current_context, "selection", None)
        if selection is not None:
            selection = np.array(selection, dtype="bool")
        self.imageplot.selection = selection
Example #34
0
class OWChoropleth(OWWidget):
    """
    This is to `OWDataProjectionWidget` what
    `OWChoroplethPlotGraph` is to `OWScatterPlotBase`.
    """

    name = 'Choropleth Map'
    description = 'A thematic map in which areas are shaded in proportion ' \
                  'to the measurement of the statistical variable being displayed.'
    icon = "icons/Choropleth.svg"
    priority = 120

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        # Added by Jean 2020/06/20, output agg data for future useage
        agg_data = Output("Aggregated data", Table)

    settings_version = 2
    settingsHandler = DomainContextHandler()
    selection = Setting(None, schema_only=True)
    auto_commit = Setting(True)

    attr_lat = ContextSetting(None)
    attr_lon = ContextSetting(None)

    agg_attr = ContextSetting(None)

    # Added by Jean 2020/06/16
    palette_key = Setting(next(iter(ContinuousPalettes)))

    agg_func = ContextSetting(DEFAULT_AGG_FUNC)
    admin_level = Setting(0)
    binning_index = Setting(0)

    GRAPH_CLASS = OWChoroplethPlotMapGraph
    graph = SettingProvider(OWChoroplethPlotMapGraph)
    graph_name = "graph.plot_widget.plotItem"

    input_changed = Signal(object)
    output_changed = Signal(object)

    class Error(OWWidget.Error):
        no_lat_lon_vars = Msg("Data has no latitude and longitude variables.")

    class Warning(OWWidget.Warning):
        no_region = Msg("{} points are not in any region.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.data_ids = None  # type: Optional[np.ndarray]

        self.agg_data = None  # type: Optional[np.ndarray]
        self.region_ids = None  # type: Optional[np.ndarray]

        self.choropleth_regions = []
        self.binnings = []

        self.input_changed.connect(self.set_input_summary)
        self.output_changed.connect(self.set_output_summary)
        self.setup_gui()

    def setup_gui(self):
        self._add_graph()
        self._add_controls()
        self.input_changed.emit(None)
        self.output_changed.emit(None)

    def _add_graph(self):
        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = self.GRAPH_CLASS(self, box)
        box.layout().addWidget(self.graph.plot_widget)

    def _add_controls(self):
        options = dict(labelWidth=75,
                       orientation=Qt.Horizontal,
                       sendSelectedValue=True,
                       contentsLength=14)

        lat_lon_box = gui.vBox(self.controlArea, True)
        self.lat_lon_model = DomainModel(DomainModel.MIXED,
                                         valid_types=(ContinuousVariable, ))

        # Added by Jean 2020/04/25 for support of selecting Tile provider
        gui.comboBox(lat_lon_box,
                     self,
                     'graph.tile_provider_key',
                     label='Map:',
                     items=list(TILE_PROVIDERS.keys()),
                     callback=self.graph.update_tile_provider,
                     **options)

        gui.comboBox(lat_lon_box,
                     self,
                     'attr_lat',
                     label='Latitude:',
                     callback=self.setup_plot,
                     model=self.lat_lon_model,
                     **options)

        gui.comboBox(lat_lon_box,
                     self,
                     'attr_lon',
                     label='Longitude:',
                     callback=self.setup_plot,
                     model=self.lat_lon_model,
                     **options)

        agg_box = gui.vBox(self.controlArea, True)
        self.agg_attr_model = DomainModel(valid_types=(ContinuousVariable,
                                                       DiscreteVariable))
        gui.comboBox(agg_box,
                     self,
                     'agg_attr',
                     label='Attribute:',
                     callback=self.update_agg,
                     model=self.agg_attr_model,
                     **options)

        self.agg_func_combo = gui.comboBox(agg_box,
                                           self,
                                           'agg_func',
                                           label='Agg.:',
                                           items=[DEFAULT_AGG_FUNC],
                                           callback=self.graph.update_colors,
                                           **options)
        # Modified by Jean 2020/05/13, set max to 3
        a_slider = gui.hSlider(agg_box,
                               self,
                               'admin_level',
                               minValue=0,
                               maxValue=4,
                               step=1,
                               label='Detail:',
                               createLabel=False,
                               callback=self.setup_plot)
        a_slider.setFixedWidth(176)

        visualization_box = gui.vBox(self.controlArea, True)
        b_slider = gui.hSlider(visualization_box,
                               self,
                               "binning_index",
                               label="Bin width:",
                               minValue=0,
                               maxValue=max(1,
                                            len(self.binnings) - 1),
                               createLabel=False,
                               callback=self.graph.update_colors)
        b_slider.setFixedWidth(176)

        av_slider = gui.hSlider(visualization_box,
                                self,
                                "graph.alpha_value",
                                minValue=0,
                                maxValue=255,
                                step=10,
                                label="Opacity:",
                                createLabel=False,
                                callback=self.graph.update_colors)
        av_slider.setFixedWidth(176)

        gui.checkBox(visualization_box,
                     self,
                     "graph.show_legend",
                     "Show legend",
                     callback=self.graph.update_legend_visibility)

        # Added by Jean 2020/06/16 for support of selecting color palette
        av_slider.setFixedWidth(176)
        gui.comboBox(
            visualization_box,
            self,
            'palette_key',
            label='Palette:',
            items=list(ContinuousPalettes.keys()),
            # items = [palette.friendly_name for palette in ContinuousPalettes.values()],
            callback=self.update_palette,
            **options)

        self.controlArea.layout().addStretch(100)

        plot_gui = OWPlotGUI(self)
        plot_gui.box_zoom_select(self.controlArea)
        gui.auto_send(self.controlArea, self, "auto_commit")

    @property
    def effective_variables(self):
        return [self.attr_lat, self.attr_lon] \
            if self.attr_lat and self.attr_lon else []

    @property
    def effective_data(self):
        return self.data.transform(Domain(self.effective_variables))

    # Input
    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        data_existed = self.data is not None
        effective_data = self.effective_data if data_existed else None

        self.closeContext()
        self.data = data
        self.Warning.no_region.clear()
        self.Error.no_lat_lon_vars.clear()
        self.agg_func = DEFAULT_AGG_FUNC
        self.check_data()
        self.init_attr_values()
        self.openContext(self.data)

        if not (data_existed and self.data is not None
                and array_equal(effective_data.X, self.effective_data.X)):
            self.clear(cache=True)
            self.input_changed.emit(data)
            self.setup_plot()
        self.update_agg()
        self.apply_selection()
        self.unconditional_commit()

    def check_data(self):
        if self.data is not None and (len(self.data) == 0
                                      or len(self.data.domain) == 0):
            self.data = None

    def init_attr_values(self):
        lat, lon = None, None
        if self.data is not None:
            lat, lon = find_lat_lon(self.data, filter_hidden=True)
            if lat is None or lon is None:
                # we either find both or we don't have valid data
                self.Error.no_lat_lon_vars()
                self.data = None
                lat, lon = None, None

        domain = self.data.domain if self.data is not None else None
        self.lat_lon_model.set_domain(domain)
        self.agg_attr_model.set_domain(domain)
        self.agg_attr = domain.class_var if domain is not None else None
        self.attr_lat, self.attr_lon = lat, lon

    def set_input_summary(self, data):
        summary = str(len(data)) if data else self.info.NoInput
        self.info.set_input_summary(summary)

    def set_output_summary(self, data):
        summary = str(len(data)) if data else self.info.NoOutput
        self.info.set_output_summary(summary)

    def update_agg(self):
        current_agg = self.agg_func
        self.agg_func_combo.clear()

        if self.agg_attr is not None:
            new_aggs = list(AGG_FUNCS)
            if self.agg_attr.is_discrete:
                new_aggs = [agg for agg in AGG_FUNCS if AGG_FUNCS[agg].disc]
            elif self.agg_attr.is_time:
                new_aggs = [agg for agg in AGG_FUNCS if AGG_FUNCS[agg].time]
        else:
            new_aggs = [DEFAULT_AGG_FUNC]

        self.agg_func_combo.addItems(new_aggs)

        if current_agg in new_aggs:
            self.agg_func = current_agg
        else:
            self.agg_func = DEFAULT_AGG_FUNC

        self.graph.update_colors()

    # Added by Jean 2020/06/16 for support of selecting color palette
    def update_palette(self):
        # print(self.palette_key)
        # print(ContinuousPalettes[self.palette_key])
        self.agg_attr.palette = ContinuousPalettes[self.palette_key]
        self.graph.update_colors()

    def setup_plot(self):
        self.controls.binning_index.setEnabled(not self.is_mode())
        self.clear()
        self.graph.reset_graph()

    def apply_selection(self):
        if self.data is not None and self.selection is not None:
            index_group = np.array(self.selection).T
            selection = np.zeros(self.graph.n_ids, dtype=np.uint8)
            selection[index_group[0]] = index_group[1]
            self.graph.selection = selection
            self.graph.update_selection_colors()

    def selection_changed(self):
        sel = None if self.data and isinstance(self.data, SqlTable) \
            else self.graph.selection
        self.selection = [(i, x) for i, x in enumerate(sel) if x] \
            if sel is not None else None
        self.commit()

    def commit(self):
        self.send_data()

    def send_data(self):
        data, graph_sel = self.data, self.graph.get_selection()
        selected_data, ann_data = None, None
        if data:
            group_sel = np.zeros(len(data), dtype=int)

            if len(graph_sel):
                # we get selection by region ids so we have to map it to points
                for id, s in zip(self.region_ids, graph_sel):
                    if s == 0:
                        continue
                    id_indices = np.where(self.data_ids == id)[0]
                    group_sel[id_indices] = s
            else:
                graph_sel = [0]

            if np.sum(graph_sel) > 0:
                selected_data = create_groups_table(data, group_sel, False,
                                                    "Group")

            if data is not None:
                if np.max(graph_sel) > 1:
                    ann_data = create_groups_table(data, group_sel)
                else:
                    ann_data = create_annotated_table(data,
                                                      group_sel.astype(bool))

        self.output_changed.emit(selected_data)
        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(ann_data)
        # Added by Jean 2020/06/20, output aggdata for future usage
        agg_data = self.agg_data  # type: Optional[np.ndarray]
        region_ids = self.region_ids  # type: Optional[np.ndarray]
        if agg_data is not None:
            agg_data = agg_data.reshape(agg_data.shape[0], 1)
            region_ids = region_ids.reshape(region_ids.shape[0], 1)
            agg_data = Table.from_numpy(None, agg_data, None, region_ids)
        self.Outputs.agg_data.send(agg_data)

    def recompute_binnings(self):
        if self.is_mode():
            return

        if self.is_time():
            self.binnings = time_binnings(self.agg_data,
                                          min_bins=3,
                                          max_bins=15)
        else:
            self.binnings = decimal_binnings(self.agg_data,
                                             min_bins=3,
                                             max_bins=15)

        max_bins = len(self.binnings) - 1
        self.controls.binning_index.setMaximum(max_bins)
        self.binning_index = min(max_bins, self.binning_index)

    def get_binning(self) -> BinDefinition:
        return self.binnings[self.binning_index]

    def get_palette(self):
        if self.agg_func in ('Count', 'Count defined'):
            return DefaultContinuousPalette
        elif self.is_mode():
            return LimitedDiscretePalette(MAX_COLORS)
        else:
            return self.agg_attr.palette

    def get_color_data(self):
        return self.get_reduced_agg_data()

    def get_color_labels(self):
        if self.is_mode():
            return self.get_reduced_agg_data(return_labels=True)
        elif self.is_time():
            return self.agg_attr.str_val

    def get_reduced_agg_data(self, return_labels=False):
        """
        This returns agg data or its labels. It also merges infrequent data.
        """
        needs_merging = self.is_mode() \
                        and len(self.agg_attr.values) >= MAX_COLORS
        if return_labels and not needs_merging:
            return self.agg_attr.values

        if not needs_merging:
            return self.agg_data

        dist = bincount(self.agg_data,
                        max_val=len(self.agg_attr.values) - 1)[0]
        infrequent = np.zeros(len(self.agg_attr.values), dtype=bool)
        infrequent[np.argsort(dist)[:-(MAX_COLORS - 1)]] = True
        if return_labels:
            return [
                value
                for value, infreq in zip(self.agg_attr.values, infrequent)
                if not infreq
            ] + ["Other"]
        else:
            result = self.agg_data.copy()
            freq_vals = [i for i, f in enumerate(infrequent) if not f]
            for i, infreq in enumerate(infrequent):
                if infreq:
                    result[self.agg_data == i] = MAX_COLORS - 1
                else:
                    result[self.agg_data == i] = freq_vals.index(i)
            return result

    def is_mode(self):
        return self.agg_attr is not None and \
               self.agg_attr.is_discrete and \
               self.agg_func == 'Mode'

    def is_time(self):
        return self.agg_attr is not None and \
               self.agg_attr.is_time and \
               self.agg_func not in ('Count', 'Count defined')

    @memoize_method(3)
    def get_regions(self, lat_attr, lon_attr, admin):
        """
        Map points to regions and get regions information.
        Returns:
            ndarray of ids corresponding to points,
            dict of region ids matched to their additional info,
            dict of region ids matched to their polygon
        """
        latlon = np.c_[self.data.get_column_view(lat_attr)[0],
                       self.data.get_column_view(lon_attr)[0]]
        region_info = latlon2region(latlon, admin)
        ids = np.array([region.get('_id') for region in region_info])
        region_info = {info.get('_id'): info for info in region_info}

        self.data_ids = np.array(ids)
        no_region = np.sum(self.data_ids == None)
        if no_region:
            self.Warning.no_region(no_region)

        unique_ids = list(set(ids) - {None})
        polygons = {
            _id: poly
            for _id, poly in zip(unique_ids, get_shape(unique_ids))
        }
        return ids, region_info, polygons

    def get_grouped(self, lat_attr, lon_attr, admin, attr, agg_func):
        """
        Get aggregation value for points grouped by regions.
        Returns:
            Series of aggregated values
        """
        if attr is not None:
            data = self.data.get_column_view(attr)[0]
        else:
            data = np.ones(len(self.data))

        ids, _, _ = self.get_regions(lat_attr, lon_attr, admin)
        result = pd.Series(data, dtype=float)\
            .groupby(ids)\
            .agg(AGG_FUNCS[agg_func].transform)

        return result

    def get_agg_data(self) -> np.ndarray:
        result = self.get_grouped(self.attr_lat, self.attr_lon,
                                  self.admin_level, self.agg_attr,
                                  self.agg_func)

        self.agg_data = np.array(result.values)
        self.region_ids = np.array(result.index)

        arg_region_sort = np.argsort(self.region_ids)
        self.region_ids = self.region_ids[arg_region_sort]
        self.agg_data = self.agg_data[arg_region_sort]

        self.recompute_binnings()

        # Added by Jean 2020/06/20, output aggregated data
        self.send_data()

        return self.agg_data

    def format_agg_val(self, value):
        if self.agg_func in ('Count', 'Count defined'):
            return f"{value:d}"
        else:
            return self.agg_attr.repr_val(value)

    def get_choropleth_regions(self) -> List[_ChoroplethRegion]:
        """Recalculate regions"""
        if self.attr_lat is None:
            # if we don't have locations we can't compute regions
            return []

        _, region_info, polygons = self.get_regions(self.attr_lat,
                                                    self.attr_lon,
                                                    self.admin_level)

        regions = []
        for _id in polygons:
            if isinstance(polygons[_id], MultiPolygon):
                # some regions consist of multiple polygons
                polys = list(polygons[_id].geoms)
            else:
                polys = [polygons[_id]]

            qpolys = [
                self.poly2qpoly(transform(self.deg2canvas, poly))
                for poly in polys
            ]
            regions.append(
                _ChoroplethRegion(id=_id, info=region_info[_id],
                                  qpolys=qpolys))

        self.choropleth_regions = sorted(regions, key=lambda cr: cr.id)
        self.get_agg_data()
        return self.choropleth_regions

    @staticmethod
    def poly2qpoly(poly: Polygon) -> QPolygonF:
        return QPolygonF([QPointF(x, y) for x, y in poly.exterior.coords])

    @staticmethod
    def deg2canvas(x, y):
        x, y = deg2norm(x, y)
        y = 1 - y
        return x, y

    def clear(self, cache=False):
        self.choropleth_regions = []
        if cache:
            self.get_regions.cache_clear()

    def send_report(self):
        if self.data is None:
            return
        self.report_plot()

    def sizeHint(self):
        return QSize(1132, 708)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()
        self.graph.clear()

    def keyPressEvent(self, event):
        """Update the tip about using the modifier keys when selecting"""
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        """Update the tip about using the modifier keys when selecting"""
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def showEvent(self, ev):
        super().showEvent(ev)
        # reset the map on show event since before that we didn't know the
        # right resolution
        self.graph.update_view_range()

    def resizeEvent(self, ev):
        super().resizeEvent(ev)
        # when resizing we need to constantly reset the map so that new
        # portions are drawn
        self.graph.update_view_range(match_data=False)

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2:
            settings["graph"] = {}
            rename_setting(settings, "admin", "admin_level")
            rename_setting(settings, "autocommit", "auto_commit")
            settings["graph"]["alpha_value"] = \
                round(settings["opacity"] * 2.55)
            settings["graph"]["show_legend"] = settings["show_legend"]

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            migrate_str_to_variable(context,
                                    names="lat_attr",
                                    none_placeholder="")
            migrate_str_to_variable(context,
                                    names="lon_attr",
                                    none_placeholder="")
            migrate_str_to_variable(context, names="attr", none_placeholder="")

            rename_setting(context, "lat_attr", "attr_lat")
            rename_setting(context, "lon_attr", "attr_lon")
            rename_setting(context, "attr", "agg_attr")
            # old selection will not be ported
            rename_setting(context, "selection", "old_selection")

            if context.values["agg_func"][0] == "Max":
                context.values["agg_func"] = ("Maximal",
                                              context.values["agg_func"][1])
            elif context.values["agg_func"][0] == "Min":
                context.values["agg_func"] = ("Minimal",
                                              context.values["agg_func"][1])
            elif context.values["agg_func"][0] == "Std":
                context.values["agg_func"] = ("Std.",
                                              context.values["agg_func"][1])
class TestDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(
            attributes=[ContinuousVariable('c1'),
                        DiscreteVariable('d1', values='abc'),
                        DiscreteVariable('d2', values='def')],
            class_vars=[DiscreteVariable('d3', values='ghi')],
            metas=[ContinuousVariable('c2'),
                   DiscreteVariable('d4', values='jkl')]
        )
        self.args = (self.domain,
                     {'c1': Continuous, 'd1': Discrete,
                      'd2': Discrete, 'd3': Discrete},
                     {'c2': Continuous, 'd4': Discrete, })
        self.handler = DomainContextHandler(metas_in_res=True)
        self.handler.read_defaults = lambda: None

    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete,
                          'd2': Discrete, 'd3': Discrete})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete, })

    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete, 'd2': Discrete,
                          'd3': list('ghi')})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})

    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': list('abc'),
                          'd2': list('def'), 'd3': list('ghi')})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous, 'd4': list('jkl')})

    def test_encode_domain_with_false_attributes_in_res(self):
        handler = DomainContextHandler(attributes_in_res=False,
                                       metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})

    def test_encode_domain_with_false_metas_in_res(self):
        handler = DomainContextHandler(attributes_in_res=True,
                                       metas_in_res=False)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete,
                          'd2': Discrete, 'd3': Discrete})
        self.assertEqual(encoded_metas, {})

    def test_match_returns_2_on_perfect_match(self):
        context = Mock(
            attributes=self.args[1], metas=self.args[2], values={})
        self.assertEqual(2., self.handler.match(context, *self.args))

    def test_match_returns_1_if_everything_matches(self):
        self.handler.bind(SimpleWidget)

        # Attributes in values
        context = Mock(values=dict(
            with_metas=('d1', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in values
        context = Mock(values=dict(
            with_metas=('d4', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Attributes in lists
        context = Mock(values=dict(
            with_metas=[("d1", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in lists
        context = Mock(values=dict(
            with_metas=[("d4", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

    def test_match_returns_point_1_when_nothing_to_match(self):
        self.handler.bind(SimpleWidget)

        context = Mock(values={})
        self.assertEqual(0.1, self.handler.match(context, *self.args))

    def test_match_returns_zero_on_incompatible_context(self):
        self.handler.bind(SimpleWidget)

        # required
        context = Mock(values=dict(required=('u', Discrete),
                                   with_metas=('d1', Discrete)))
        self.assertEqual(0, self.handler.match(context, *self.args))

        # selected if_selected
        context = Mock(values=dict(with_metas=('d1', Discrete),
                                   if_selected=[('u', Discrete)],
                                   selected=[0]))
        self.assertEqual(0, self.handler.match(context, *self.args))

        # unselected if_selected
        context = Mock(values=dict(with_metas=('d1', Discrete),
                                   if_selected=[('u', Discrete),
                                                ('d1', Discrete)],
                                   selected=[1]))
        self.assertAlmostEqual(0.667, self.handler.match(context, *self.args),
                               places=2)

    def test_clone_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)],
            required=('u', Continuous)
        ))

        new_values = self.handler.clone_context(context, *self.args).values

        self.assertEqual(new_values['text'], ('u', -2))
        self.assertEqual([('d1', Discrete), ('c1', Continuous)],
                         new_values['with_metas'])
        self.assertNotIn('required', new_values)

    def test_open_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d2', Discrete)]
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('d2', Discrete)])

    def test_open_context_with_imperfect_match(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(None, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)],
            if_selected=[('c1', Discrete), ('c1', Continuous),
                         ('d1', Discrete), ('d1', Continuous)],
            selected=[2],
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('c1', Continuous)])
        self.assertEqual(widget.if_selected, [('c1', Continuous),
                                              ('d1', Discrete)])
        self.assertEqual(widget.selected, [1])

    def test_open_context_with_no_match(self):
        self.handler.bind(SimpleWidget)
        widget = SimpleWidget()
        self.handler.initialize(widget)
        widget.text = 'u'

        self.handler.open_context(widget, self.args[0])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [])
        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))
        self.assertEqual(context.values['text'], ('u', -2))

    def test_filter_value(self):
        setting = ContextSetting([])
        setting.name = "value"

        def test_filter(before_value, after_value):
            data = dict(value=before_value)
            self.handler.filter_value(setting, data, *self.args)
            self.assertEqual(data.get("value", None), after_value)

        # filter list values
        test_filter([], [])
        # When list contains attributes asa tuple of (name, type),
        # Attributes not present in domain should be filtered out
        test_filter([("d1", Discrete), ("d1", Continuous),
                     ("c1", Continuous), ("c1", Discrete)],
                    [("d1", Discrete), ("c1", Continuous)])
        # All other values in list should remain
        test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])

    def test_encode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.encode_setting(None, setting, var)
        self.assertEqual(val, (var.name, 100 + vartype(var)))

    def test_decode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.decode_setting(setting, (var.name, 100 + vartype(var)), self.domain)
        self.assertIs(val, var)

    def create_context(self, domain, values):
        if not domain:
            domain = Domain([])

        context = self.handler.new_context(domain,
                                           *self.handler.encode_domain(domain))
        context.values = values
        return context
Example #36
0
class OWScatterPlot(OWWidget):
    name = 'Scatter Plot'
    description = 'Scatter plot visualization.'
    icon = "icons/ScatterPlot.svg"

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data"),
              ("Features", AttributeList, "set_shown_attributes")]

    outputs = [("Selected Data", Table, Default), ("Other Data", Table),
               ("Features", Table)]

    settingsHandler = DomainContextHandler()

    auto_send_selection = Setting(True)
    auto_sample = Setting(True)
    toolbar_selection = Setting(0)

    attr_x = ContextSetting("")
    attr_y = ContextSetting("")

    graph = SettingProvider(OWScatterPlotGraph)

    jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10]

    graph_name = "graph.plot_widget.plotItem"

    def __init__(self):
        super().__init__()

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWScatterPlotGraph(self, box, "ScatterPlot")
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        axispen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text))
        axis = plot.getAxis("bottom")
        axis.setPen(axispen)

        axis = plot.getAxis("left")
        axis.setPen(axispen)

        self.data = None  # Orange.data.Table
        self.subset_data = None  # Orange.data.Table
        self.data_metas_X = None  # self.data, where primitive metas are moved to X
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)
        box = gui.vBox(self.controlArea, "Axis Data")
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.update_attr,
                                      **common_options)

        self.vizrank = self.VizRank(self)
        vizrank_box = gui.hBox(box)
        gui.separator(vizrank_box, width=common_options["labelWidth"])
        self.vizrank_button = gui.button(
            vizrank_box,
            self,
            "Rank projections",
            callback=self.vizrank.reshow,
            tooltip="Find projections with good class separation")
        self.vizrank_button.setEnabled(False)
        gui.separator(box)

        gui.valueSlider(box,
                        self,
                        value='graph.jitter_size',
                        label='Jittering: ',
                        values=self.jitter_sizes,
                        callback=self.reset_graph_data,
                        labelFormat=lambda x: "None"
                        if x == 0 else ("%.1f %%" if x < 1 else "%d %%") % x)
        gui.checkBox(gui.indentedBox(box),
                     self,
                     'graph.jitter_continuous',
                     'Jitter continuous values',
                     callback=self.reset_graph_data)

        self.sampling = gui.auto_commit(self.controlArea,
                                        self,
                                        "auto_sample",
                                        "Sample",
                                        box="Sampling",
                                        callback=self.switch_sampling,
                                        commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

        box = gui.vBox(self.controlArea, "Points")
        self.cb_attr_color = gui.comboBox(box,
                                          self,
                                          "graph.attr_color",
                                          label="Color:",
                                          emptyString="(Same color)",
                                          callback=self.update_colors,
                                          **common_options)
        self.cb_attr_label = gui.comboBox(box,
                                          self,
                                          "graph.attr_label",
                                          label="Label:",
                                          emptyString="(No labels)",
                                          callback=self.graph.update_labels,
                                          **common_options)
        self.cb_attr_shape = gui.comboBox(box,
                                          self,
                                          "graph.attr_shape",
                                          label="Shape:",
                                          emptyString="(Same shape)",
                                          callback=self.graph.update_shapes,
                                          **common_options)
        self.cb_attr_size = gui.comboBox(box,
                                         self,
                                         "graph.attr_size",
                                         label="Size:",
                                         emptyString="(Same size)",
                                         callback=self.graph.update_sizes,
                                         **common_options)

        g = self.graph.gui
        box2 = g.point_properties_box(self.controlArea, box)

        box = gui.vBox(self.controlArea, "Plot Properties")
        g.add_widgets([g.ShowLegend, g.ShowGridLines], box)
        gui.checkBox(box,
                     self,
                     value='graph.tooltip_shows_all',
                     label='Show all data on mouse hover')
        self.cb_class_density = gui.checkBox(box,
                                             self,
                                             value='graph.class_density',
                                             label='Show class density',
                                             callback=self.update_density)

        self.zoom_select_toolbar = g.zoom_select_toolbar(
            gui.vBox(self.controlArea, "Zoom/Select"),
            nomargin=True,
            buttons=[
                g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom,
                g.StateButtonsEnd, g.ZoomReset
            ])
        buttons = self.zoom_select_toolbar.buttons
        buttons[g.Zoom].clicked.connect(self.graph.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.graph.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(
            self.graph.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.graph.reset_button_clicked)
        self.controlArea.layout().addStretch(100)
        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_send_selection",
                        "Send Selection")

        def zoom(s):
            """Zoom in/out by factor `s`."""
            viewbox = plot.getViewBox()
            # scaleBy scales the view's bounds (the axis range)
            viewbox.scaleBy((1 / s, 1 / s))

        def fit_to_view():
            viewbox = plot.getViewBox()
            viewbox.autoRange()

        zoom_in = QtGui.QAction("Zoom in", self, triggered=lambda: zoom(1.25))
        zoom_in.setShortcuts([
            QtGui.QKeySequence(QtGui.QKeySequence.ZoomIn),
            QtGui.QKeySequence(self.tr("Ctrl+="))
        ])
        zoom_out = QtGui.QAction("Zoom out",
                                 self,
                                 shortcut=QtGui.QKeySequence.ZoomOut,
                                 triggered=lambda: zoom(1 / 1.25))
        zoom_fit = QtGui.QAction("Fit in view",
                                 self,
                                 shortcut=QtGui.QKeySequence(Qt.ControlModifier
                                                             | Qt.Key_0),
                                 triggered=fit_to_view)
        self.addActions([zoom_in, zoom_out, zoom_fit])

    # def settingsFromWidgetCallback(self, handler, context):
    #     context.selectionPolygons = []
    #     for curve in self.graph.selectionCurveList:
    #         xs = [curve.x(i) for i in range(curve.dataSize())]
    #         ys = [curve.y(i) for i in range(curve.dataSize())]
    #         context.selectionPolygons.append((xs, ys))

    # def settingsToWidgetCallback(self, handler, context):
    #     selections = getattr(context, "selectionPolygons", [])
    #     for (xs, ys) in selections:
    #         c = SelectionCurve("")
    #         c.setData(xs,ys)
    #         c.attach(self.graph)
    #         self.graph.selectionCurveList.append(c)

    def reset_graph_data(self, *_):
        self.graph.rescale_data()
        self.update_graph()

    def set_data(self, data):
        self.information(1)
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(data, SqlTable):
            if data.approx_len() < 4000:
                data = Table(data)
            else:
                self.information(1, "Large SQL table (showing a sample)")
                self.sql_data = data
                data_sample = data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if data is not None and (len(data) == 0 or len(data.domain) == 0):
            data = None
        if self.data and data and self.data.checksum() == data.checksum():
            return

        self.closeContext()
        same_domain = (self.data and data and data.domain.checksum()
                       == self.data.domain.checksum())
        self.data = data
        self.data_metas_X = self.move_primitive_metas_to_X(data)

        if not same_domain:
            self.init_attr_values()
        self.vizrank._initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and self.data.domain.class_var is not None
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1)
        self.openContext(self.data)

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            return self.__timer.stop()
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.data_metas_X = self.move_primitive_metas_to_X(self.data)
            self.handleNewSignals()

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def move_primitive_metas_to_X(self, data):
        if data is not None:
            new_attrs = [
                a for a in data.domain.attributes + data.domain.metas
                if a.is_primitive()
            ]
            new_metas = [m for m in data.domain.metas if not m.is_primitive()]
            data = Table.from_table(
                Domain(new_attrs, data.domain.class_vars, new_metas), data)
        return data

    def set_subset_data(self, subset_data):
        self.warning(0)
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning(0,
                             "Data subset does not support large Sql tables")
                subset_data = None
        self.subset_data = self.move_primitive_metas_to_X(subset_data)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        self.graph.new_data(self.data_metas_X, self.subset_data)
        if self.attribute_selection_list and \
                all(attr.name in self.graph.attribute_name_index
                    for attr in self.attribute_selection_list):
            self.attr_x = self.attribute_selection_list[0].name
            self.attr_y = self.attribute_selection_list[1].name
        self.attribute_selection_list = None
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.unconditional_commit()

    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
        else:
            self.attribute_selection_list = None

    def get_shown_attributes(self):
        return self.attr_x, self.attr_y

    def init_attr_values(self):
        self.cb_attr_x.clear()
        self.cb_attr_y.clear()
        self.attr_x = None
        self.attr_y = None
        self.cb_attr_color.clear()
        self.cb_attr_color.addItem("(Same color)")
        self.cb_attr_label.clear()
        self.cb_attr_label.addItem("(No labels)")
        self.cb_attr_shape.clear()
        self.cb_attr_shape.addItem("(Same shape)")
        self.cb_attr_size.clear()
        self.cb_attr_size.addItem("(Same size)")
        if not self.data:
            return

        for var in self.data.domain.metas:
            if not var.is_primitive():
                self.cb_attr_label.addItem(self.icons[var], var.name)
        for attr in self.data.domain.variables:
            self.cb_attr_x.addItem(self.icons[attr], attr.name)
            self.cb_attr_y.addItem(self.icons[attr], attr.name)
            self.cb_attr_color.addItem(self.icons[attr], attr.name)
            if attr.is_discrete:
                self.cb_attr_shape.addItem(self.icons[attr], attr.name)
            else:
                self.cb_attr_size.addItem(self.icons[attr], attr.name)
            self.cb_attr_label.addItem(self.icons[attr], attr.name)
        for var in self.data.domain.metas:
            if var.is_primitive():
                self.cb_attr_x.addItem(self.icons[var], var.name)
                self.cb_attr_y.addItem(self.icons[var], var.name)
                self.cb_attr_color.addItem(self.icons[var], var.name)
                if var.is_discrete:
                    self.cb_attr_shape.addItem(self.icons[var], var.name)
                else:
                    self.cb_attr_size.addItem(self.icons[var], var.name)
                self.cb_attr_label.addItem(self.icons[var], var.name)

        self.attr_x = self.cb_attr_x.itemText(0)
        if self.cb_attr_y.count() > 1:
            self.attr_y = self.cb_attr_y.itemText(1)
        else:
            self.attr_y = self.cb_attr_y.itemText(0)

        if self.data.domain.class_var:
            self.graph.attr_color = self.data.domain.class_var.name
        else:
            self.graph.attr_color = ""
        self.graph.attr_shape = ""
        self.graph.attr_size = ""
        self.graph.attr_label = ""

    def update_attr(self, attributes=None):
        self.update_graph(attributes=attributes)
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.send_features()

    def update_colors(self):
        self.graph.update_colors()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def update_density(self):
        self.update_graph(reset_view=False)

    def update_graph(self, attributes=None, reset_view=True, **_):
        self.graph.zoomStack = []
        if attributes and len(attributes) == 2:
            self.attr_x, self.attr_y = attributes
        if not self.graph.have_data:
            return
        self.graph.update_data(self.attr_x, self.attr_y, reset_view)

    def selection_changed(self):
        self.send_data()

    def send_data(self):
        selected = unselected = None
        # TODO: Implement selection for sql data
        if isinstance(self.data, SqlTable):
            selected = unselected = self.data
        elif self.data is not None:
            selection = self.graph.get_selection()
            selected = self.data[selection]
            unselection = np.full(len(self.data), True, dtype=bool)
            unselection[selection] = False
            unselected = self.data[unselection]
        self.send("Selected Data", selected)
        self.send("Other Data", unselected)

    def send_features(self):
        features = None
        if self.attr_x or self.attr_y:
            dom = Domain([], metas=(StringVariable(name="feature"), ))
            features = Table(dom, [[self.attr_x], [self.attr_y]])
            features.name = "Features"
        self.send("Features", features)

    def commit(self):
        self.send_data()
        self.send_features()

    def closeEvent(self, ce):
        self.vizrank.close()
        super().closeEvent(ce)

    def hideEvent(self, he):
        self.vizrank.hide()
        super().hideEvent(he)

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.combo_value(self.cb_attr_x),
                                     self.combo_value(self.cb_attr_y))

    def send_report(self):
        disc_attr = False
        if self.data:
            domain = self.data.domain
            disc_attr = domain[self.attr_x].is_discrete or \
                        domain[self.attr_y].is_discrete
        caption = report.render_items_vert(
            (("Color", self.combo_value(self.cb_attr_color)),
             ("Label", self.combo_value(self.cb_attr_label)),
             ("Shape", self.combo_value(self.cb_attr_shape)),
             ("Size", self.combo_value(self.cb_attr_size)),
             ("Jittering", (self.graph.jitter_continuous or disc_attr)
              and self.graph.jitter_size)))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()

    class VizRank(OWWidget):
        name = "Rank projections (Scatter Plot)"

        want_control_area = False

        def __init__(self, parent_widget):
            super().__init__()
            self.parent_widget = parent_widget
            self.running = False
            self.progress = None
            self.k = 10

            self.projectionTable = QTableView()
            self.mainArea.layout().addWidget(self.projectionTable)
            self.projectionTable.setSelectionBehavior(QTableView.SelectRows)
            self.projectionTable.setSelectionMode(QTableView.SingleSelection)
            self.projectionTable.setSortingEnabled(True)
            self.projectionTableModel = QStandardItemModel(self)
            self.projectionTable.setModel(self.projectionTableModel)
            self.projectionTable.selectionModel().selectionChanged.connect(
                self.on_selection_changed)

            self.button = gui.button(self.mainArea,
                                     self,
                                     "Start evaluation",
                                     callback=self.toggle,
                                     default=True)
            self.resize(380, 512)
            self._initialize()

        def _initialize(self):
            self.running = False
            self.projectionTableModel.clear()
            self.projectionTableModel.setHorizontalHeaderLabels(
                ["Score", "Feature 1", "Feature 2"])
            self.projectionTable.setColumnWidth(0, 60)
            self.projectionTable.setColumnWidth(1, 120)
            self.projectionTable.setColumnWidth(2, 120)
            self.button.setText("Start evaluation")
            self.button.setEnabled(False)
            self.pause = False
            self.data = None
            self.attrs = []
            self.scores = []
            self.i, self.j = 0, 0
            if self.progress:
                self.progress.finish()
            self.progress = None

            self.information(0)
            if self.parent_widget.data:
                if not self.parent_widget.data.domain.class_var:
                    self.information(
                        0, "Data with a class variable is required.")
                    return
                if len(self.parent_widget.data.domain.attributes) < 2:
                    self.information(0,
                                     'At least 2 unique features are needed.')
                    return
                if len(self.parent_widget.data) < 2:
                    self.information(0, 'At least 2 instances are needed.')
                    return
                self.button.setEnabled(True)

        def on_selection_changed(self, selected, deselected):
            """Called when the ranks view selection changes."""
            a1 = selected.indexes()[1].data()
            a2 = selected.indexes()[2].data()
            self.parent_widget.update_attr(attributes=(a1, a2))

        def toggle(self):
            self.running ^= 1
            if self.running:
                self.button.setText("Pause")
                self.run()
            else:
                self.button.setText("Continue")
                self.button.setEnabled(False)

        def run(self):
            graph = self.parent_widget.graph
            y_full = self.parent_widget.data.Y
            if not self.attrs:
                self.attrs = self.score_heuristic()
            if not self.progress:
                self.progress = gui.ProgressBar(
                    self,
                    len(self.attrs) * (len(self.attrs) - 1) / 2)
            for i in range(self.i, len(self.attrs)):
                ind1 = graph.attribute_name_index[self.attrs[i]]
                for j in range(self.j, i):
                    if not self.running:
                        self.i, self.j = i, j
                        if not self.projectionTable.selectedIndexes():
                            self.projectionTable.selectRow(0)
                        self.button.setEnabled(True)
                        return
                    ind2 = graph.attribute_name_index[self.attrs[j]]
                    X = graph.scaled_data[[ind1, ind2], :]
                    valid = graph.get_valid_list([ind1, ind2])
                    X = X[:, valid].T
                    if X.shape[0] < self.k:
                        self.progress.advance()
                        continue
                    y = y_full[valid]
                    n_neighbors = min(self.k, len(X) - 1)
                    knn = NearestNeighbors(n_neighbors=n_neighbors).fit(X)
                    ind = knn.kneighbors(return_distance=False)
                    if self.parent_widget.data.domain.has_discrete_class:
                        score = np.sum(y[ind] == y.reshape(-1, 1)) / (
                            len(y_full) * n_neighbors)
                    else:
                        score = r2_score(y, np.mean(
                            y[ind], axis=1)) * (len(y) / len(y_full))
                    pos = bisect_left(self.scores, score)
                    self.projectionTableModel.insertRow(
                        len(self.scores) - pos, [
                            QStandardItem("{:.4f}".format(score)),
                            QStandardItem(self.attrs[j]),
                            QStandardItem(self.attrs[i])
                        ])
                    self.scores.insert(pos, score)
                    self.progress.advance()
                self.j = 0
            self.progress.finish()
            if not self.projectionTable.selectedIndexes():
                self.projectionTable.selectRow(0)
            self.button.setText("Finished")
            self.button.setEnabled(False)

        def score_heuristic(self):
            X = self.parent_widget.graph.scaled_data.T
            Y = self.parent_widget.data.Y
            dom = Domain(
                [ContinuousVariable(str(i)) for i in range(X.shape[1])],
                self.parent_widget.data.domain.class_vars)
            data = Table(dom, X, Y)
            relief = ReliefF if isinstance(dom.class_var,
                                           DiscreteVariable) else RReliefF
            weights = relief(n_iterations=100, k_nearest=self.k)(data)
            attrs = sorted(zip(
                weights,
                (x.name for x in self.parent_widget.data.domain.attributes)),
                           reverse=True)
            return [a for _, a in attrs]
class OWStackAlign(OWWidget):
    # Widget's name as displayed in the canvas
    name = "Align Stack"

    # Short widget description
    description = ("Aligns and crops a stack of images using various methods.")

    icon = "icons/stackalign.svg"

    # Define inputs and outputs
    class Inputs:
        data = Input("Stack of images", Table, default=True)

    class Outputs:
        newstack = Output("Aligned image stack", Table, default=True)

    class Error(OWWidget.Error):
        nan_in_image = Msg("Unknown values within images: {} unknowns")
        invalid_axis = Msg("Invalid axis: {}")

    autocommit = settings.Setting(True)

    want_main_area = True
    want_control_area = True
    resizing_enabled = False

    settingsHandler = DomainContextHandler()

    sobel_filter = settings.Setting(False)
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    ref_frame_num = settings.Setting(0)

    def __init__(self):
        super().__init__()

        # TODO: add input box for selecting which should be the reference frame
        box = gui.widgetBox(self.controlArea, "Axes")

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)
        self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                    valid_types=ContinuousVariable)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self._update_attr,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self._update_attr,
                                      model=self.xy_model,
                                      **common_options)

        self.contextAboutToBeOpened.connect(self._init_interface_data)

        box = gui.widgetBox(self.controlArea, "Parameters")

        gui.checkBox(box,
                     self,
                     "sobel_filter",
                     label="Use sobel filter",
                     callback=self._sobel_changed)
        gui.separator(box)
        hbox = gui.hBox(box)
        self.le1 = lineEditIntRange(box,
                                    self,
                                    "ref_frame_num",
                                    bottom=1,
                                    default=1,
                                    callback=self._ref_frame_changed)
        hbox.layout().addWidget(QLabel("Reference frame:", self))
        hbox.layout().addWidget(self.le1)

        gui.rubber(self.controlArea)

        plot_box = gui.widgetBox(self.mainArea, "Shift curves")
        self.plotview = pg.PlotWidget(background="w")
        plot_box.layout().addWidget(self.plotview)
        # TODO:  resize widget to make it a bit smaller

        self.data = None

        gui.auto_commit(self.controlArea, self, "autocommit", "Send Data")

    def _sanitize_ref_frame(self):
        if self.ref_frame_num > self.data.X.shape[1]:
            self.ref_frame_num = self.data.X.shape[1]

    def _ref_frame_changed(self):
        self._sanitize_ref_frame()
        self.commit()

    def _sobel_changed(self):
        self.commit()

    def _init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def _init_interface_data(self, args):
        data = args[0]
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self._init_attr_values(data)

    def _update_attr(self):
        self.commit()

    @Inputs.data
    def set_data(self, dataset):
        self.closeContext()
        self.openContext(dataset)
        if dataset is not None:
            self.data = dataset
            self._sanitize_ref_frame()
        else:
            self.data = None
        self.Error.nan_in_image.clear()
        self.Error.invalid_axis.clear()
        self.commit()

    def commit(self):
        new_stack = None

        self.Error.nan_in_image.clear()
        self.Error.invalid_axis.clear()

        self.plotview.plotItem.clear()

        if self.data and len(
                self.data.domain.attributes) and self.attr_x and self.attr_y:
            try:
                shifts, new_stack = process_stack(
                    self.data,
                    self.attr_x,
                    self.attr_y,
                    upsample_factor=100,
                    use_sobel=self.sobel_filter,
                    ref_frame_num=self.ref_frame_num - 1)
            except NanInsideHypercube as e:
                self.Error.nan_in_image(e.args[0])
            except InvalidAxisException as e:
                self.Error.invalid_axis(e.args[0])
            else:
                # TODO: label axes
                frames = np.linspace(1, shifts.shape[0], shifts.shape[0])
                self.plotview.plotItem.plot(frames,
                                            shifts[:, 0],
                                            pen=pg.mkPen(color=(255, 40, 0),
                                                         width=3),
                                            symbol='o',
                                            symbolBrush=(255, 40, 0),
                                            symbolPen='w',
                                            symbolSize=7)
                self.plotview.plotItem.plot(frames,
                                            shifts[:, 1],
                                            pen=pg.mkPen(color=(0, 139, 139),
                                                         width=3),
                                            symbol='o',
                                            symbolBrush=(0, 139, 139),
                                            symbolPen='w',
                                            symbolSize=7)
                self.plotview.getPlotItem().setLabel('bottom', 'Frame number')
                self.plotview.getPlotItem().setLabel('left', 'Shift / pixel')
                self.plotview.getPlotItem().addLine(
                    self.ref_frame_num,
                    pen=pg.mkPen(color=(150, 150, 150),
                                 width=3,
                                 style=Qt.DashDotDotLine))

        self.Outputs.newstack.send(new_stack)

    def send_report(self):
        self.report_items((("Use sobel filter", str(self.sobel_filter)), ))
Example #38
0
class OWCreateClass(widget.OWWidget):
    name = "Create Class"
    description = "Create class attribute from a string attribute"
    icon = "icons/CreateClass.svg"
    category = "Data"
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    class_name = ContextSetting("class")
    rules = ContextSetting({})
    match_beginning = ContextSetting(False)
    case_sensitive = ContextSetting(False)

    TRANSFORMERS = {StringVariable: ValueFromStringSubstring,
                    DiscreteVariable: ValueFromDiscreteSubstring}

    # Cached variables are used so that two instances of the widget with the
    # same settings will create the same variable. The usual `make` wouldn't
    # work here because variables with `compute_value` are not reused.
    cached_variables = {}

    class Warning(widget.OWWidget.Warning):
        no_nonnumeric_vars = Msg("Data contains only numeric variables.")

    class Error(widget.OWWidget.Error):
        class_name_duplicated = Msg("Class name duplicated.")
        class_name_empty = Msg("Class name should not be empty.")

    def __init__(self):
        super().__init__()
        self.data = None

        # The following lists are of the same length as self.active_rules

        #: list of pairs with counts of matches for each patter when the
        #     patterns are applied in order and when applied on the entire set,
        #     disregarding the preceding patterns
        self.match_counts = []

        #: list of list of QLineEdit: line edit pairs for each pattern
        self.line_edits = []
        #: list of QPushButton: list of remove buttons
        self.remove_buttons = []
        #: list of list of QLabel: pairs of labels with counts
        self.counts = []

        gui.lineEdit(
            self.controlArea, self, "class_name",
            orientation=Qt.Horizontal, box="New Class Name")

        variable_select_box = gui.vBox(self.controlArea, "Match by Substring")

        combo = gui.comboBox(
            variable_select_box, self, "attribute", label="From column:",
            orientation=Qt.Horizontal, searchable=True,
            callback=self.update_rules,
            model=DomainModel(valid_types=(StringVariable, DiscreteVariable)))
        # Don't use setSizePolicy keyword argument here: it applies to box,
        # not the combo
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Preferred)

        patternbox = gui.vBox(variable_select_box)
        #: QWidget: the box that contains the remove buttons, line edits and
        #    count labels. The lines are added and removed dynamically.
        self.rules_box = rules_box = QGridLayout()
        rules_box.setSpacing(4)
        rules_box.setContentsMargins(4, 4, 4, 4)
        self.rules_box.setColumnMinimumWidth(1, 70)
        self.rules_box.setColumnMinimumWidth(0, 10)
        self.rules_box.setColumnStretch(0, 1)
        self.rules_box.setColumnStretch(1, 1)
        self.rules_box.setColumnStretch(2, 100)
        rules_box.addWidget(QLabel("Name"), 0, 1)
        rules_box.addWidget(QLabel("Substring"), 0, 2)
        rules_box.addWidget(QLabel("Count"), 0, 3, 1, 2)
        self.update_rules()

        widget = QWidget(patternbox)
        widget.setLayout(rules_box)
        patternbox.layout().addWidget(widget)

        box = gui.hBox(patternbox)
        gui.rubber(box)
        gui.button(box, self, "+", callback=self.add_row,
                   autoDefault=False, width=34,
                   sizePolicy=(QSizePolicy.Maximum,
                               QSizePolicy.Maximum))

        optionsbox = gui.vBox(self.controlArea, "Options")
        gui.checkBox(
            optionsbox, self, "match_beginning", "Match only at the beginning",
            callback=self.options_changed)
        gui.checkBox(
            optionsbox, self, "case_sensitive", "Case sensitive",
            callback=self.options_changed)

        gui.rubber(self.controlArea)

        gui.button(self.buttonsArea, self, "Apply", callback=self.apply)

        # TODO: Resizing upon changing the number of rules does not work
        self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)

    @property
    def active_rules(self):
        """
        Returns the class names and patterns corresponding to the currently
            selected attribute. If the attribute is not yet in the dictionary,
            set the default.
        """
        return self.rules.setdefault(self.attribute and self.attribute.name,
                                     [["", ""], ["", ""]])

    def rules_to_edits(self):
        """Fill the line edites with the rules from the current settings."""
        for editr, textr in zip(self.line_edits, self.active_rules):
            for edit, text in zip(editr, textr):
                edit.setText(text)

    @Inputs.data
    def set_data(self, data):
        """Input data signal handler."""
        self.closeContext()
        self.rules = {}
        self.data = data
        model = self.controls.attribute.model()
        model.set_domain(data.domain if data is not None else None)
        self.Warning.no_nonnumeric_vars(shown=data is not None and not model)
        if not model:
            self.attribute = None
            self.Outputs.data.send(None)
            return
        self.attribute = model[0]
        self.openContext(data)
        self.update_rules()
        self.apply()

    def update_rules(self):
        """Called when the rules are changed: adjust the number of lines in
        the form and fill them, update the counts. The widget does not have
        auto-apply."""
        self.adjust_n_rule_rows()
        self.rules_to_edits()
        self.update_counts()
        # TODO: Indicator that changes need to be applied

    def options_changed(self):
        self.update_counts()

    def adjust_n_rule_rows(self):
        """Add or remove lines if needed and fix the tab order."""
        def _add_line():
            self.line_edits.append([])
            n_lines = len(self.line_edits)
            for coli in range(1, 3):
                edit = QLineEdit()
                self.line_edits[-1].append(edit)
                self.rules_box.addWidget(edit, n_lines, coli)
                edit.textChanged.connect(self.sync_edit)
            button = gui.button(
                None, self, label='×', width=33,
                autoDefault=False, callback=self.remove_row,
                sizePolicy=(QSizePolicy.Maximum,
                            QSizePolicy.Maximum)
            )
            self.remove_buttons.append(button)
            self.rules_box.addWidget(button, n_lines, 0)
            self.counts.append([])
            for coli, kwargs in enumerate(
                    (dict(),
                     dict(styleSheet="color: gray"))):
                label = QLabel(alignment=Qt.AlignCenter, **kwargs)
                self.counts[-1].append(label)
                self.rules_box.addWidget(label, n_lines, 3 + coli)

        def _remove_line():
            for edit in self.line_edits.pop():
                edit.deleteLater()
            self.remove_buttons.pop().deleteLater()
            for label in self.counts.pop():
                label.deleteLater()

        def _fix_tab_order():
            prev = None
            for row, rule in zip(self.line_edits, self.active_rules):
                for col_idx, edit in enumerate(row):
                    edit.row, edit.col_idx = rule, col_idx
                    if prev is not None:
                        self.setTabOrder(prev, edit)
                    prev = edit

        n = len(self.active_rules)
        while n > len(self.line_edits):
            _add_line()
        while len(self.line_edits) > n:
            _remove_line()
        _fix_tab_order()

    def add_row(self):
        """Append a new row at the end."""
        self.active_rules.append(["", ""])
        self.adjust_n_rule_rows()
        self.update_counts()

    def remove_row(self):
        """Remove a row."""
        remove_idx = self.remove_buttons.index(self.sender())
        del self.active_rules[remove_idx]
        self.update_rules()
        self.update_counts()

    def sync_edit(self, text):
        """Handle changes in line edits: update the active rules and counts"""
        edit = self.sender()
        edit.row[edit.col_idx] = text
        self.update_counts()

    def class_labels(self):
        """Construct a list of class labels. Empty labels are replaced with
        C1, C2, C3. If C<n> already appears in the list of values given by
        the user, the labels start at C<n+1> instead.
        """
        largest_c = max((int(label[1:]) for label, _ in self.active_rules
                         if re.match("^C\\d+", label)),
                        default=0)
        class_count = count(largest_c + 1)
        return [label_edit.text() or "C{}".format(next(class_count))
                for label_edit, _ in self.line_edits]

    def update_counts(self):
        """Recompute and update the counts of matches."""
        def _matcher(strings, pattern):
            """Return indices of strings into patterns; consider case
            sensitivity and matching at the beginning. The given strings are
            assumed to be in lower case if match is case insensitive. Patterns
            are fixed on the fly."""
            if not self.case_sensitive:
                pattern = pattern.lower()
            indices = np.char.find(strings, pattern.strip())
            return indices == 0 if self.match_beginning else indices != -1

        def _lower_if_needed(strings):
            return strings if self.case_sensitive else np.char.lower(strings)

        def _string_counts():
            """
            Generate pairs of arrays for each rule until running out of data
            instances. np.sum over the two arrays in each pair gives the
            number of matches of the remaining instances (considering the
            order of patterns) and of the original data.

            For _string_counts, the arrays contain bool masks referring to the
            original data
            """
            nonlocal data
            data = data.astype(str)
            data = data[~np.char.equal(data, "")]
            data = _lower_if_needed(data)
            remaining = np.array(data)
            for _, pattern in self.active_rules:
                matching = _matcher(remaining, pattern)
                total_matching = _matcher(data, pattern)
                yield matching, total_matching
                remaining = remaining[~matching]
                if not remaining.size:
                    break

        def _discrete_counts():
            """
            Generate pairs similar to _string_counts, except that the arrays
            contain bin counts for the attribute's values matching the pattern.
            """
            attr_vals = np.array(attr.values)
            attr_vals = _lower_if_needed(attr_vals)
            bins = bincount(data, max_val=len(attr.values) - 1)[0]
            remaining = np.array(bins)
            for _, pattern in self.active_rules:
                matching = _matcher(attr_vals, pattern)
                yield remaining[matching], bins[matching]
                remaining[matching] = 0
                if not np.any(remaining):
                    break

        def _clear_labels():
            """Clear all labels"""
            for lab_matched, lab_total in self.counts:
                lab_matched.setText("")
                lab_total.setText("")

        def _set_labels():
            """Set the labels to show the counts"""
            for (n_matched, n_total), (lab_matched, lab_total), (lab, patt) in \
                    zip(self.match_counts, self.counts, self.active_rules):
                n_before = n_total - n_matched
                lab_matched.setText("{}".format(n_matched))
                if n_before and (lab or patt):
                    lab_total.setText("+ {}".format(n_before))
                    if n_matched:
                        tip = "{} of the {} matching instances are already " \
                              "covered above".format(n_before, n_total)
                    else:
                        tip = "All matching instances are already covered above"
                    lab_total.setToolTip(tip)
                    lab_matched.setToolTip(tip)

        def _set_placeholders():
            """Set placeholders for empty edit lines"""
            matches = [n for n, _ in self.match_counts] + \
                      [0] * len(self.line_edits)
            for n_matched, (_, patt) in zip(matches, self.line_edits):
                if not patt.text():
                    patt.setPlaceholderText(
                        "(remaining instances)" if n_matched else "(unused)")

            labels = self.class_labels()
            for label, (lab_edit, _) in zip(labels, self.line_edits):
                if not lab_edit.text():
                    lab_edit.setPlaceholderText(label)

        _clear_labels()
        attr = self.attribute
        if attr is None:
            return
        counters = {StringVariable: _string_counts,
                    DiscreteVariable: _discrete_counts}
        data = self.data.get_column_view(attr)[0]
        self.match_counts = [[int(np.sum(x)) for x in matches]
                             for matches in counters[type(attr)]()]
        _set_labels()
        _set_placeholders()

    def apply(self):
        """Output the transformed data."""
        self.Error.clear()
        self.class_name = self.class_name.strip()
        if not self.attribute:
            self.Outputs.data.send(None)
            return
        domain = self.data.domain
        if not self.class_name:
            self.Error.class_name_empty()
        if self.class_name in domain:
            self.Error.class_name_duplicated()
        if not self.class_name or self.class_name in domain:
            self.Outputs.data.send(None)
            return
        new_class = self._create_variable()
        new_domain = Domain(
            domain.attributes, new_class, domain.metas + domain.class_vars)
        new_data = self.data.transform(new_domain)
        self.Outputs.data.send(new_data)

    def _create_variable(self):
        rules = self.active_rules
        # Transposition + stripping
        valid_rules = [label or pattern or n_matches
                       for (label, pattern), n_matches in
                       zip(rules, self.match_counts)]
        patterns = tuple(
            pattern for (_, pattern), valid in zip(rules, valid_rules) if valid)
        names = tuple(
            name for name, valid in zip(self.class_labels(), valid_rules)
            if valid)
        transformer = self.TRANSFORMERS[type(self.attribute)]

        # join patters with the same names
        names, map_values = unique_in_order_mapping(names)
        names = tuple(str(a) for a in names)
        map_values = tuple(map_values)

        var_key = (self.attribute, self.class_name, names,
                   patterns, self.case_sensitive, self.match_beginning, map_values)
        if var_key in self.cached_variables:
            return self.cached_variables[var_key]

        compute_value = transformer(
            self.attribute, patterns, self.case_sensitive, self.match_beginning,
            map_values)
        new_var = DiscreteVariable(
            self.class_name, names, compute_value=compute_value)
        self.cached_variables[var_key] = new_var
        return new_var

    def send_report(self):
        # Pylint gives false positives: these functions are always called from
        # within the loop
        # pylint: disable=undefined-loop-variable
        def _cond_part():
            rule = "<b>{}</b> ".format(class_name)
            if patt:
                rule += "if <b>{}</b> contains <b>{}</b>".format(
                    self.attribute.name, patt)
            else:
                rule += "otherwise"
            return rule

        def _count_part():
            if not n_matched:
                return "all {} matching instances are already covered " \
                       "above".format(n_total)
            elif n_matched < n_total and patt:
                return "{} matching instances (+ {} that are already " \
                       "covered above".format(n_matched, n_total - n_matched)
            else:
                return "{} matching instances".format(n_matched)

        if not self.attribute:
            return
        self.report_items("Input", [("Source attribute", self.attribute.name)])
        output = ""
        names = self.class_labels()
        for (n_matched, n_total), class_name, (lab, patt) in \
                zip(self.match_counts, names, self.active_rules):
            if lab or patt or n_total:
                output += "<li>{}; {}</li>".format(_cond_part(), _count_part())
        if output:
            self.report_items("Output", [("Class name", self.class_name)])
            self.report_raw("<ol>{}</ol>".format(output))
Example #39
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"
    priority = 220

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    PEARSON, CLASS_DISTRIBUTION = 0, 1

    settingsHandler = DomainContextHandler()
    use_boxes = Setting(True)
    interior_coloring = Setting(CLASS_DISTRIBUTION)
    variable1 = ContextSetting("", exclude_metas=False)
    variable2 = ContextSetting("", exclude_metas=False)
    variable3 = ContextSetting("", exclude_metas=False)
    variable4 = ContextSetting("", exclude_metas=False)
    variable_color = ContextSetting("", exclude_metas=False)
    selection = ContextSetting(set())

    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255),
                   QColor(110, 110, 255), QColor(0, 0, 255)]
    RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200),
                  QColor(255, 100, 100), QColor(255, 0, 0)]

    vizrank = SettingProvider(MosaicVizRank)

    graph_name = "canvas"

    class Warning(OWWidget.Warning):
        incompatible_subset = Msg("Data subset is incompatible with Data")
        no_valid_data = Msg("No valid data")
        no_cont_selection_sql = \
            Msg("Selection of continuous variables on SQL is not supported")

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.unprocessed_subset_data = None
        self.subset_data = None

        self.color_data = None

        self.areas = []

        self.canvas = QGraphicsScene()
        self.canvas_view = ViewWithPress(self.canvas,
                                         handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.attr_combos = [
            gui.comboBox(
                box, self, value="variable{}".format(i),
                orientation=Qt.Horizontal, contentsLength=12,
                callback=self.reset_graph,
                sendSelectedValue=True, valueType=str, emptyString="(None)")
            for i in range(1, 5)]
        self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank(
            box, self, "Find Informative Mosaics", self.set_attr)

        box2 = gui.vBox(self.controlArea, box="Interior Coloring")
        dmod = DomainModel
        self.color_model = DomainModel(order=dmod.MIXED,
                                       valid_types=dmod.PRIMITIVE,
                                       placeholder="(Pearson residuals)")
        self.cb_attr_color = gui.comboBox(
            box2, self, value="variable_color",
            orientation=Qt.Horizontal, contentsLength=12, labelWidth=50,
            callback=self.set_color_data,
            sendSelectedValue=True, model=self.color_model, valueType=str)
        self.bar_button = gui.checkBox(
            box2, self, 'use_boxes', label='Compare with total',
            callback=self._compare_with_total)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(720, 530)

    def _compare_with_total(self):
        if self.data is not None and \
                self.data.domain.class_var is not None and \
                self.interior_coloring != self.CLASS_DISTRIBUTION:
            self.interior_coloring = self.CLASS_DISTRIBUTION
            self.coloring_changed()  # This also calls self.update_graph
        else:
            self.update_graph()

    def _get_discrete_data(self, data):
        """
        Discretizes continuous attributes.
        Returns None when there is no data, no rows, or no discrete or continuous attributes.
        """
        if (data is None or
                not len(data) or
                not any(attr.is_discrete or attr.is_continuous
                        for attr in chain(data.domain, data.domain.metas))):
            return None
        elif any(attr.is_continuous for attr in data.domain):
            return Discretize(
                method=EqualFreq(n=4), remove_const=False, discretize_classes=True,
                discretize_metas=True)(data)
        else:
            return data

    def init_combos(self, data):
        for combo in self.attr_combos:
            combo.clear()
        if data is None:
            return
        for combo in self.attr_combos[1:]:
            combo.addItem("(None)")

        icons = gui.attributeIconDict
        for attr in chain(data.domain, data.domain.metas):
            if attr.is_primitive:
                for combo in self.attr_combos:
                    combo.addItem(icons[attr], attr.name)

        if self.attr_combos[0].count() > 0:
            self.variable1 = self.attr_combos[0].itemText(0)
            self.variable2 = self.attr_combos[1].itemText(
                2 * (self.attr_combos[1].count() > 2))
        self.variable3 = self.attr_combos[2].itemText(0)
        self.variable4 = self.attr_combos[3].itemText(0)
        if self.data.domain.class_var:
            self.variable_color = self.data.domain.class_var.name
            idx = self.cb_attr_color.findText(self.variable_color)
        else:
            idx = 0
        self.cb_attr_color.setCurrentIndex(idx)

    def get_attr_list(self):
        return [
            a for a in [self.variable1, self.variable2,
                        self.variable3, self.variable4]
            if a and a != "(None)"]

    def set_attr(self, *attrs):
        self.variable1, self.variable2, self.variable3, self.variable4 = \
            [a.name if a else "" for a in attrs]
        self.reset_graph()

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    @Inputs.data
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1 \
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            return

        self.color_model.set_domain(self.data.domain)
        self.init_combos(self.data)

        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None

        self.set_color_data()

    @Inputs.data_subset
    def set_subset_data(self, data):
        self.Warning.incompatible_subset.clear()
        if self.data is None:
            self.unprocessed_subset_data = data
            return
        try:
            self.subset_data = data.transform(self.data.domain)
        except:
            self.subset_data = None
            self.Warning.incompatible_subset(shown=data is not None)

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.reset_graph()

    def clear_selection(self):
        self.selection = set()
        self.update_selection_rects()
        self.send_selection()

    def coloring_changed(self):
        self.vizrank.coloring_changed()
        self.update_graph()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def set_color_data(self):
        if self.data is None or len(self.data) < 2 or len(self.data.domain.attributes) < 1:
            return
        if self.cb_attr_color.currentIndex() <= 0:
            color_var = None
            self.interior_coloring = self.PEARSON
            self.bar_button.setEnabled(False)
        else:
            color_var = self.data.domain[self.cb_attr_color.currentText()]
            self.interior_coloring = self.CLASS_DISTRIBUTION
            self.bar_button.setEnabled(True)
        attributes = [v for v in self.data.domain if v != color_var]
        metas = [v for v in self.data.domain.metas if v != color_var]
        domain = Domain(attributes, color_var, metas)
        self.color_data = color_data = self.data.from_table(domain, self.data)
        self.discrete_data = self._get_discrete_data(color_data)
        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(True)
        self.coloring_changed()

    def update_selection_rects(self):
        for i, (_, _, area) in enumerate(self.areas):
            if i in self.selection:
                area.setPen(QPen(Qt.black, 3, Qt.DotLine))
            else:
                area.setPen(QPen())

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx))

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def get_counts(attr_vals, values):
            """This function calculates rectangles' widths.
            If all widths are zero then all widths are set to 1."""
            if attr_vals == "":
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [conditionaldict[attr_vals + "-" + val]
                          for val in values]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)
            return total, counts

        def draw_data(attr_list, x0_x1, y0_y1, side, condition,
                      total_attrs, used_attrs, used_vals, attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0, x1, y0, y1, "",
                         used_attrs, used_vals, attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (
                    len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            total, counts = get_counts(attr_vals, values)

            # if we are visualizing the third attribute and the first attribute
            # has the last value, we have to reverse the order in which the
            # boxes will be drawn otherwise, if the last cell, nearest to the
            # labels of the fourth attribute, is empty, we wouldn't be able to
            # position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(
                    data.domain[used_attrs[0]])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = to_html(val)
                if attr_vals != "":
                    newattrvals = attr_vals + "-" + val
                else:
                    newattrvals = val

                tooltip = condition + 4 * "&nbsp;" + attr + \
                    ": <b>" + htmlval + "</b><br>"
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                common_args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1,
                                  tooltip, total_attrs, *common_args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1,
                                  tooltip, total_attrs, *common_args)

            draw_text(side, attr_list[0], (x0, x1), (y0, y1),
                      total_attrs, used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1,
                      total_attrs, used_attrs, used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = \
                    get_variable_values_sorted(data.domain[used_attrs[0]])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0

            total, counts = get_counts(attr_vals, values)

            aligns = [Qt.AlignTop | Qt.AlignHCenter,
                      Qt.AlignRight | Qt.AlignVCenter,
                      Qt.AlignBottom | Qt.AlignHCenter,
                      Qt.AlignLeft | Qt.AlignVCenter]
            align = aligns[side]
            for i, val in enumerate(values):
                perc = counts[i] / float(total)
                if distributiondict[val] != 0:
                    if side == 0:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * 0.5 * perc,
                                   y1 + self.ATTR_VAL_OFFSET, align)
                    elif side == 1:
                        CanvasText(self.canvas, str(val),
                                   x0 - self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)
                    elif side == 2:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * perc * 0.5,
                                   y0 - self.ATTR_VAL_OFFSET, align)
                    else:
                        CanvasText(self.canvas, str(val),
                                   x1 + self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)

                if side % 2 == 0:
                    currpos += perc * width + spacing * (total_attrs - side)
                else:
                    currpos += perc * height + spacing * (total_attrs - side)

            if side == 0:
                CanvasText(
                    self.canvas, attr,
                    x0 + (x1 - x0) / 2,
                    y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET,
                    align, bold=1)
            elif side == 1:
                CanvasText(
                    self.canvas, attr,
                    x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                    y0 + (y1 - y0) / 2,
                    align, bold=1, vertical=True)
            elif side == 2:
                CanvasText(
                    self.canvas, attr,
                    x0 + (x1 - x0) / 2,
                    y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET,
                    align, bold=1)
            else:
                CanvasText(
                    self.canvas, attr,
                    x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET,
                    y0 + (y1 - y0) / 2,
                    align, bold=1, vertical=True)

        def add_rect(x0, x1, y0, y1, condition,
                     used_attrs, used_vals, attr_vals=""):
            area_index = len(self.areas)
            if x0 == x1:
                x1 += 1
            if y0 == y1:
                y1 += 1

            # rectangles of width and height 1 are not shown - increase
            if x1 - x0 + y1 - y0 == 2:
                y1 += 1

            if class_var:
                colors = [QColor(*col) for col in class_var.colors]
            else:
                colors = None

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(
                        self.canvas, x, y, w, h, z=z, onclick=select_area,
                        **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(
                    self.canvas, x, y, w, h, pen_color, brush_color, z=z,
                    onclick=select_area, **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.interior_coloring == self.PEARSON:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                    mul,
                    (apriori_dists[i][used_vals[i]] / float(s)
                     for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = (actual - expected) / sqrt(expected)
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                    condition + "<hr/>" +
                    "Expected instances: %.1f<br>"
                    "Actual instances: %d<br>"
                    "Standardized (Pearson) residual: %.1f" %
                    (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and \
                        abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        counts = [conditionalsubsetdict[attr_vals + "-" + val]
                                  for val in cls_values]
                        if sum(counts) == 1:
                            rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550,
                                 colors[counts.index(1)], Qt.white,
                                 penWidth=2, penStyle=Qt.DashLine)
                        if self.subset_data is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[
                                        attr_vals + "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total,
                                         bar_width, v, 15, color)
                                    total += v

                actual = [conditionaldict[attr_vals + "-" + cls_values[i]]
                          for i in range(len(prior))]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                        "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                        (cls, act, 100.0 * act / n_actual,
                         apr / n_apriori * n_actual, 100.0 * apr / n_apriori)
                        for cls, act, apr in zip(cls_values, actual, apriori))
                else:
                    text = ""
                outer_rect.setToolTip(
                    "{}<hr>Instances: {}<br><br>{}".format(
                        condition, n_actual, text[:-4]))

        def draw_legend(x0_x1, y0_y1):
            x0, x1 = x0_x1
            _, y1 = y0_y1
            if self.interior_coloring == self.PEARSON:
                names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                         "Residuals:"]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
            else:
                names = get_variable_values_sorted(class_var) + \
                        [class_var.name + ":"]
                colors = [QColor(*col) for col in class_var.colors]

            names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter)
                     for name in names]
            totalwidth = sum(text.boundingRect().width() for text in names)

            # compute the x position of the center of the legend
            y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
            distance = 30
            startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2

            names[-1].setPos(startx + 15, y)
            names[-1].show()
            xoffset = names[-1].boundingRect().width() + distance

            size = 8

            for i in range(len(names) - 1):
                if self.interior_coloring == self.PEARSON:
                    edgecolor = Qt.black
                else:
                    edgecolor = colors[i]

                CanvasRectangle(self.canvas, startx + xoffset, y - size / 2,
                                size, size, edgecolor, colors[i])
                names[i].setPos(startx + xoffset + 10, y)
                xoffset += distance + names[i].boundingRect().width()

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        subset = self.subset_data
        attr_list = self.get_attr_list()
        class_var = data.domain.class_var
        if class_var:
            sql = type(data) == SqlTable
            name = not sql and data.name
            # save class_var because it is removed in the next line
            data = data[:, attr_list + [class_var]]
            data.domain.class_var = class_var
            if not sql:
                data.name = name
        else:
            data = data[:, attr_list]
        # TODO: check this
        # data = Preprocessor_dropMissing(data)
        if len(data) == 0:
            self.Warning.no_valid_data()
            return
        else:
            self.Warning.no_valid_data.clear()

        attrs = [attr for attr in attr_list if not data.domain[attr].values]
        if attrs:
            CanvasText(self.canvas,
                       "Feature {} has no values".format(attrs[0]),
                       (self.canvas_view.width() - 120) / 2,
                       self.canvas_view.height() / 2)
            return
        if self.interior_coloring == self.PEARSON:
            apriori_dists = [get_distribution(data, attr) for attr in attr_list]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(data.domain[attr])
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        # get the maximum width of rectangle
        xoff = 20
        width = 20
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1], bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas, attr_list[3], bold=1, show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        # get the maximum height of rectangle
        height = 100
        yoff = 45
        square_size = min(self.canvas_view.width() - width - 20,
                          self.canvas_view.height() - height - 20)

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(
            0, 0, self.canvas_view.width(), self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if subset:
            conditionalsubsetdict, _ = \
                get_conditional_distribution(subset, attr_list)

        # draw rectangles
        draw_data(
            attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size),
            0, "", len(attr_list), [], [])
        draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size))
        self.update_selection_rects()
Example #40
0
class OWTranspose(OWWidget):
    name = "Transpose"
    description = "Transpose data table."
    icon = "icons/Transpose.svg"
    priority = 2000
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table, dynamic=False)

    GENERIC, FROM_VAR = range(2)

    resizing_enabled = False
    want_main_area = False

    DEFAULT_PREFIX = "Feature"

    settingsHandler = DomainContextHandler()
    feature_type = ContextSetting(GENERIC)
    feature_name = ContextSetting("")
    feature_names_column = ContextSetting(None)
    auto_apply = Setting(True)

    class Warning(OWWidget.Warning):
        duplicate_names = Msg("Values are not unique.\nTo avoid multiple "
                              "features with the same name, values \nof "
                              "'{}' have been augmented with indices.")

    class Error(OWWidget.Error):
        value_error = Msg("{}")

    def __init__(self):
        super().__init__()
        self.data = None

        # self.apply is changed later, pylint: disable=unnecessary-lambda
        box = gui.radioButtons(self.controlArea,
                               self,
                               "feature_type",
                               box="Feature names",
                               callback=lambda: self.apply())

        button = gui.appendRadioButton(box, "Generic")
        edit = gui.lineEdit(gui.indentedBox(box,
                                            gui.checkButtonOffsetHint(button)),
                            self,
                            "feature_name",
                            placeholderText="Type a prefix ...",
                            toolTip="Custom feature name")
        edit.editingFinished.connect(self._apply_editing)

        self.meta_button = gui.appendRadioButton(box, "From variable:")
        self.feature_model = DomainModel(valid_types=(ContinuousVariable,
                                                      StringVariable),
                                         alphabetical=False)
        self.feature_combo = gui.comboBox(gui.indentedBox(
            box, gui.checkButtonOffsetHint(button)),
                                          self,
                                          "feature_names_column",
                                          contentsLength=12,
                                          callback=self._feature_combo_changed,
                                          model=self.feature_model)

        self.apply_button = gui.auto_apply(self.controlArea,
                                           self,
                                           box=False,
                                           commit=self.apply)
        self.apply_button.button.setAutoDefault(False)

        self.info.set_output_summary(self.info.NoInput)
        self.info.set_input_summary(self.info.NoInput)

        self.set_controls()

    def _apply_editing(self):
        self.feature_type = self.GENERIC
        self.feature_name = self.feature_name.strip()
        self.apply()

    def _feature_combo_changed(self):
        self.feature_type = self.FROM_VAR
        self.apply()

    @Inputs.data
    def set_data(self, data):
        # Skip the context if the combo is empty: a context with
        # feature_model == None would then match all domains
        if self.feature_model:
            self.closeContext()
        self.data = data
        if data:
            self.info.set_input_summary(len(data))
        else:
            self.info.set_input_summary(self.info.NoInput)
        self.set_controls()
        if self.feature_model:
            self.openContext(data)
        self.unconditional_apply()

    def set_controls(self):
        self.feature_model.set_domain(self.data and self.data.domain)
        self.meta_button.setEnabled(bool(self.feature_model))
        if self.feature_model:
            self.feature_names_column = self.feature_model[0]
            self.feature_type = self.FROM_VAR
        else:
            self.feature_names_column = None

    def apply(self):
        self.clear_messages()
        transposed = None
        if self.data:
            try:
                variable = self.feature_type == self.FROM_VAR and \
                           self.feature_names_column
                transposed = Table.transpose(self.data,
                                             variable,
                                             feature_name=self.feature_name
                                             or self.DEFAULT_PREFIX)
                if variable:
                    names = self.data.get_column_view(variable)[0]
                    if len(names) != len(set(names)):
                        self.Warning.duplicate_names(variable)
                self.info.set_output_summary(len(transposed))
            except ValueError as e:
                self.Error.value_error(e)
        else:
            self.info.set_output_summary(self.info.NoInput)
        self.Outputs.data.send(transposed)

    def send_report(self):
        if self.feature_type == self.GENERIC:
            names = self.feature_name or self.DEFAULT_PREFIX
        else:
            names = "from variable"
            if self.feature_names_column:
                names += "  '{}'".format(self.feature_names_column.name)
        self.report_items("", [("Feature names", names)])
        if self.data:
            self.report_data("Data", self.data)
class OWCorrelations(OWWidget):
    name = "Correlations"
    description = "Compute all pairwise attribute correlations."
    icon = "icons/Correlations.svg"
    priority = 2000

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)
        features = Output("Features", AttributeList)
        correlations = Output("Correlations", Table)

    want_control_area = False

    settingsHandler = DomainContextHandler()
    selection = ContextSetting(())
    correlation_type = Setting(0)

    class Information(OWWidget.Information):
        not_enough_vars = Msg("Need at least two continuous features.")
        not_enough_inst = Msg("Need at least two instances.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.cont_data = None

        # GUI
        box = gui.vBox(self.mainArea)
        self.correlation_combo = gui.comboBox(
            box, self, "correlation_type", items=CorrelationType.items(),
            orientation=Qt.Horizontal, callback=self._correlation_combo_changed)

        self.vizrank, _ = CorrelationRank.add_vizrank(
            None, self, None, self._vizrank_selection_changed)

        gui.separator(box)
        box.layout().addWidget(self.vizrank.filter)
        box.layout().addWidget(self.vizrank.rank_table)

        button_box = gui.hBox(self.mainArea)
        button_box.layout().addWidget(self.vizrank.button)

    def sizeHint(self):
        return QSize(350, 400)

    def _correlation_combo_changed(self):
        self.apply()

    def _vizrank_selection_changed(self, *args):
        self.selection = args
        self.commit()

    def _vizrank_select(self):
        model = self.vizrank.rank_table.model()
        selection = QItemSelection()
        for i in range(model.rowCount()):
            if model.data(model.index(i, 0)) == self.selection[0].name and \
                    model.data(model.index(i, 1)) == self.selection[1].name:
                selection.select(model.index(i, 0), model.index(i, 2))
                self.vizrank.rank_table.selectionModel().select(
                    selection, QItemSelectionModel.ClearAndSelect)
                break

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.clear_messages()
        self.data = data
        self.cont_data = None
        self.selection = ()
        if data is not None:
            cont_attrs = [a for a in data.domain.attributes if a.is_continuous]
            if len(cont_attrs) < 2:
                self.Information.not_enough_vars()
            elif len(data) < 2:
                self.Information.not_enough_inst()
            else:
                domain = data.domain
                cont_dom = Domain(cont_attrs, domain.class_vars, domain.metas)
                self.cont_data = SklImpute()(Table.from_table(cont_dom, data))
        self.apply()
        self.openContext(self.data)
        self._vizrank_select()

    def apply(self):
        self.vizrank.initialize()
        if self.cont_data is not None:
            # this triggers self.commit() by changing vizrank selection
            self.vizrank.toggle()
            header = self.vizrank.rank_table.horizontalHeader()
            header.setStretchLastSection(True)
            header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            self.commit()

    def commit(self):
        if self.data is None or self.cont_data is None:
            self.Outputs.data.send(self.data)
            self.Outputs.features.send(None)
            self.Outputs.correlations.send(None)
            return

        metas = [StringVariable("Feature 1"), StringVariable("Feature 2")]
        domain = Domain([ContinuousVariable("Correlation")], metas=metas)
        model = self.vizrank.rank_model
        x = np.array([[float(model.data(model.index(row, 2)))] for row
                      in range(model.rowCount())])
        m = np.array([[model.data(model.index(row, 0)),
                       model.data(model.index(row, 1))] for row
                      in range(model.rowCount())], dtype=object)
        corr_table = Table(domain, x, metas=m)
        corr_table.name = "Correlations"

        self.Outputs.data.send(self.data)
        # data has been imputed; send original attributes
        self.Outputs.features.send(AttributeList([attr.compute_value.variable
                                                  for attr in self.selection]))
        self.Outputs.correlations.send(corr_table)

    def send_report(self):
        self.report_table(CorrelationType.items()[self.correlation_type],
                          self.vizrank.rank_table)
Example #42
0
class OWRank(OWWidget, ConcurrentWidgetMixin):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102
    keywords = []

    buttons_area_orientation = Qt.Vertical

    class Inputs:
        data = Input("Data", Table)
        scorer = Input("Scorer", score.Scorer, multiple=True)

    class Outputs:
        reduced_data = Output("Reduced Data", Table, default=True)
        scores = Output("Scores", Table)
        features = Output("Features", AttributeList, dynamic=False)

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    nSelected = ContextSetting(5)
    auto_apply = Setting(True)

    sorting = Setting((0, Qt.DescendingOrder))
    selected_methods = Setting(set())

    settings_version = 3
    settingsHandler = DomainContextHandler()
    selected_attrs = ContextSetting([], schema_only=True)
    selectionMethod = ContextSetting(SelectNBest)

    class Information(OWWidget.Information):
        no_target_var = Msg("Data does not have a (single) target variable.")
        missings_imputed = Msg('Missing values will be imputed as needed.')

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("Scorer {} inadequate: {}")
        no_attributes = Msg("Data does not have a single attribute.")

    class Warning(OWWidget.Warning):
        renamed_variables = Msg(
            "Variables with duplicated names have been renamed.")

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.scorers = OrderedDict()
        self.out_domain_desc = None
        self.data = None
        self.problem_type_mode = ProblemType.CLASSIFICATION

        # results caches
        self.scorers_results = {}
        self.methods_results = {}

        if not self.selected_methods:
            self.selected_methods = {
                method.name
                for method in SCORES if method.is_default
            }

        # GUI
        self.ranksModel = model = TableModel(parent=self)  # type: TableModel
        self.ranksView = view = TableView(self)  # type: TableView
        self.mainArea.layout().addWidget(view)
        view.setModel(model)
        view.setColumnWidth(0, 30)
        view.selectionModel().selectionChanged.connect(self.on_select)

        def _set_select_manual():
            self.setSelectionMethod(OWRank.SelectManual)

        view.manualSelection.connect(_set_select_manual)
        view.verticalHeader().sectionClicked.connect(_set_select_manual)
        view.horizontalHeader().sectionClicked.connect(self.headerClick)

        self.measuresStack = stacked = QStackedWidget(self)
        self.controlArea.layout().addWidget(stacked)

        for scoring_methods in (CLS_SCORES, REG_SCORES, []):
            box = gui.vBox(None,
                           "Scoring Methods" if scoring_methods else None)
            stacked.addWidget(box)
            for method in scoring_methods:
                box.layout().addWidget(
                    QCheckBox(
                        method.name,
                        self,
                        objectName=method.
                        shortname,  # To be easily found in tests
                        checked=method.name in self.selected_methods,
                        stateChanged=partial(self.methodSelectionChanged,
                                             method_name=method.name)))
            gui.rubber(box)

        gui.rubber(self.controlArea)

        self.switchProblemType(ProblemType.CLASSIFICATION)

        selMethBox = gui.vBox(self.buttonsArea, "Select Attributes")

        grid = QGridLayout()
        grid.setContentsMargins(0, 0, 0, 0)
        grid.setSpacing(6)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(
            selMethBox,
            self,
            "nSelected",
            1,
            999,
            callback=lambda: self.setSelectionMethod(OWRank.SelectNBest),
            addToLayout=False)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectionMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_send(self.buttonsArea, self, "auto_apply")

        self.resize(690, 500)

    def switchProblemType(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.measuresStack.setCurrentIndex(index)
        self.problem_type_mode = index

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selected_attrs = []
        self.ranksModel.clear()
        self.ranksModel.resetSorting(True)

        self.scorers_results = {}
        self.methods_results = {}
        self.cancel()

        self.Error.clear()
        self.Information.clear()
        self.Information.missings_imputed(
            shown=data is not None and data.has_missing())

        if data is not None and not data.domain.attributes:
            data = None
            self.Error.no_attributes()
        self.data = data
        self.switchProblemType(ProblemType.CLASSIFICATION)
        if self.data is not None:
            domain = self.data.domain
            if domain.has_discrete_class:
                problem_type = ProblemType.CLASSIFICATION
            elif domain.has_continuous_class:
                problem_type = ProblemType.REGRESSION
            elif not domain.class_var:
                self.Information.no_target_var()
                problem_type = ProblemType.UNSUPERVISED
            else:
                # This can happen?
                self.Error.invalid_type(type(domain.class_var).__name__)
                problem_type = None

            if problem_type is not None:
                self.switchProblemType(problem_type)

            self.ranksModel.setVerticalHeaderLabels(domain.attributes)
            self.ranksView.setVHeaderFixedWidthFromLabel(
                max((a.name for a in domain.attributes), key=len))

            self.selectionMethod = OWRank.SelectNBest

        self.openContext(data)
        self.selectButtons.button(self.selectionMethod).setChecked(True)

    def handleNewSignals(self):
        self.setStatusMessage('Running')
        self.update_scores()
        self.setStatusMessage('')
        self.on_select()

    @Inputs.scorer
    def set_learner(self, scorer, id):  # pylint: disable=redefined-builtin
        if scorer is None:
            self.scorers.pop(id, None)
        else:
            # Avoid caching a (possibly stale) previous instance of the same
            # Scorer passed via the same signal
            if id in self.scorers:
                self.scorers_results = {}

            self.scorers[id] = ScoreMeta(
                scorer.name, scorer.name, scorer,
                ProblemType.from_variable(scorer.class_type), False)

    def _get_methods(self):
        return [
            method for method in SCORES if
            (method.name in self.selected_methods
             and method.problem_type == self.problem_type_mode and
             (not issparse(self.data.X) or method.scorer.supports_sparse_data))
        ]

    def _get_scorers(self):
        scorers = []
        for scorer in self.scorers.values():
            if scorer.problem_type in (
                    self.problem_type_mode,
                    ProblemType.UNSUPERVISED,
            ):
                scorers.append(scorer)
            else:
                self.Error.inadequate_learner(scorer.name,
                                              scorer.learner_adequacy_err_msg)
        return scorers

    def update_scores(self):
        if self.data is None:
            self.ranksModel.clear()
            self.Outputs.scores.send(None)
            return

        self.Error.inadequate_learner.clear()

        scorers = [
            s for s in self._get_scorers() if s not in self.scorers_results
        ]
        methods = [
            m for m in self._get_methods() if m not in self.methods_results
        ]
        self.start(run, self.data, methods, scorers)

    def on_done(self, result: Results) -> None:
        self.methods_results.update(result.method_scores)
        self.scorers_results.update(result.scorer_scores)

        methods = self._get_methods()
        method_labels = tuple(m.shortname for m in methods)
        method_scores = tuple(self.methods_results[m] for m in methods)

        scores = [self.scorers_results[s] for s in self._get_scorers()]
        scorer_scores, scorer_labels = zip(*scores) if scores else ((), ())

        labels = method_labels + tuple(chain.from_iterable(scorer_labels))
        model_array = np.column_stack(([
            len(a.values) if a.is_discrete else np.nan
            for a in self.data.domain.attributes
        ], ) + method_scores + scorer_scores)
        for column, values in enumerate(model_array.T):
            self.ranksModel.setExtremesFrom(column, values)

        self.ranksModel.wrap(model_array.tolist())
        self.ranksModel.setHorizontalHeaderLabels(('#', ) + labels)
        self.ranksView.setColumnWidth(0, 40)

        # Re-apply sort
        try:
            sort_column, sort_order = self.sorting
            if sort_column < len(labels):
                # adds 1 for '#' (discrete count) column
                self.ranksModel.sort(sort_column + 1, sort_order)
                self.ranksView.horizontalHeader().setSortIndicator(
                    sort_column + 1, sort_order)
        except ValueError:
            pass

        self.autoSelection()
        self.Outputs.scores.send(self.create_scores_table(labels))

    def on_exception(self, ex: Exception) -> None:
        raise ex

    def on_partial_result(self, result: Any) -> None:
        pass

    def on_select(self):
        # Save indices of attributes in the original, unsorted domain
        selected_rows = self.ranksView.selectionModel().selectedRows(0)
        row_indices = [i.row() for i in selected_rows]
        attr_indices = self.ranksModel.mapToSourceRows(row_indices)
        self.selected_attrs = [self.data.domain[idx] for idx in attr_indices]
        self.commit()

    def setSelectionMethod(self, method):
        self.selectionMethod = method
        self.selectButtons.button(method).setChecked(True)
        self.autoSelection()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        model = self.ranksModel
        rowCount = model.rowCount()
        columnCount = model.columnCount()

        if self.selectionMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectionMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0), model.index(rowCount - 1, columnCount - 1))
        elif self.selectionMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0), model.index(nSelected - 1, columnCount - 1))
        else:
            selection = QItemSelection()
            if self.selected_attrs is not None:
                attr_indices = [
                    self.data.domain.attributes.index(var)
                    for var in self.selected_attrs
                ]
                for row in model.mapFromSourceRows(attr_indices):
                    selection.append(
                        QItemSelectionRange(model.index(row, 0),
                                            model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectionMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        sort_order = self.ranksModel.sortOrder()
        sort_column = self.ranksModel.sortColumn(
        ) - 1  # -1 for '#' (discrete count) column
        self.sorting = (sort_column, sort_order)

    def methodSelectionChanged(self, state, method_name):
        if state == Qt.Checked:
            self.selected_methods.add(method_name)
        elif method_name in self.selected_methods:
            self.selected_methods.remove(method_name)

        self.update_scores()

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        if not self.selected_attrs:
            self.Outputs.reduced_data.send(None)
            self.Outputs.features.send(None)
            self.out_domain_desc = None
        else:
            reduced_domain = Domain(self.selected_attrs,
                                    self.data.domain.class_var,
                                    self.data.domain.metas)
            data = self.data.transform(reduced_domain)
            self.Outputs.reduced_data.send(data)
            self.Outputs.features.send(AttributeList(self.selected_attrs))
            self.out_domain_desc = report.describe_domain(data.domain)

    def create_scores_table(self, labels):
        self.Warning.renamed_variables.clear()
        model_list = self.ranksModel.tolist()
        if not model_list or len(
                model_list[0]) == 1:  # Empty or just n_values column
            return None
        unique, renamed = get_unique_names_duplicates(labels + ('Feature', ),
                                                      return_duplicated=True)
        if renamed:
            self.Warning.renamed_variables(', '.join(renamed))

        domain = Domain([ContinuousVariable(label) for label in unique[:-1]],
                        metas=[StringVariable(unique[-1])])

        # Prevent np.inf scores
        finfo = np.finfo(np.float64)
        scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max)

        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        # If older settings, restore sort header to default
        # Saved selected_rows will likely be incorrect
        if version is None or version < 2:
            column, order = 0, Qt.DescendingOrder
            headerState = settings.pop("headerState", None)

            # Lacking knowledge of last problemType, use discrete ranks view's ordering
            if isinstance(headerState, (tuple, list)):
                headerState = headerState[0]

            if isinstance(headerState, bytes):
                hview = QHeaderView(Qt.Horizontal)
                hview.restoreState(headerState)
                column, order = hview.sortIndicatorSection(
                ) - 1, hview.sortIndicatorOrder()
            settings["sorting"] = (column, order)

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 3:
            # Selections were stored as indices, so these contexts matched
            # any domain. The only safe thing to do is to remove them.
            raise IncompatibleContext
Example #43
0
class OWLinePlot(OWWidget):
    name = "Line Plot"
    description = "Visualization of data profiles (e.g., time series)."
    icon = "icons/LinePlot.svg"
    priority = 180

    enable_selection = Signal(bool)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    group_var = ContextSetting(None)
    show_profiles = Setting(False)
    show_range = Setting(True)
    show_mean = Setting(True)
    show_error = Setting(False)
    auto_commit = Setting(True)
    selection = Setting(None, schema_only=True)
    visual_settings = Setting({}, schema_only=True)

    graph_name = "graph.plotItem"

    class Error(OWWidget.Error):
        not_enough_attrs = Msg("Need at least one continuous feature.")
        no_valid_data = Msg("No plot due to no valid data.")

    class Warning(OWWidget.Warning):
        no_display_option = Msg("No display option is selected.")

    class Information(OWWidget.Information):
        hidden_instances = Msg("Instances with unknown values are not shown.")
        too_many_features = Msg("Data has too many features. Only first {}"
                                " are shown.".format(MAX_FEATURES))

    def __init__(self, parent=None):
        super().__init__(parent)
        self.__groups = []
        self.data = None
        self.valid_data = None
        self.subset_data = None
        self.subset_indices = None
        self.__pending_selection = self.selection
        self.graph_variables = []
        self.graph = None
        self.group_vars = None
        self.group_view = None
        self.setup_gui()

        VisualSettingsDialog(self,
                             self.graph.parameter_setter.initial_settings)
        self.graph.view_box.selection_changed.connect(self.selection_changed)
        self.enable_selection.connect(self.graph.view_box.enable_selection)

    def setup_gui(self):
        self._add_graph()
        self._add_controls()

    def _add_graph(self):
        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = LinePlotGraph(self)
        box.layout().addWidget(self.graph)

    def _add_controls(self):
        displaybox = gui.widgetBox(self.controlArea, "Display")
        gui.checkBox(displaybox,
                     self,
                     "show_profiles",
                     "Lines",
                     callback=self.__show_profiles_changed,
                     tooltip="Plot lines")
        gui.checkBox(displaybox,
                     self,
                     "show_range",
                     "Range",
                     callback=self.__show_range_changed,
                     tooltip="Plot range between 10th and 90th percentile")
        gui.checkBox(displaybox,
                     self,
                     "show_mean",
                     "Mean",
                     callback=self.__show_mean_changed,
                     tooltip="Plot mean curve")
        gui.checkBox(displaybox,
                     self,
                     "show_error",
                     "Error bars",
                     callback=self.__show_error_changed,
                     tooltip="Show standard deviation")

        self.group_vars = DomainModel(placeholder="None",
                                      separators=False,
                                      valid_types=DiscreteVariable)
        self.group_view = gui.listView(self.controlArea,
                                       self,
                                       "group_var",
                                       box="Group by",
                                       model=self.group_vars,
                                       callback=self.__group_var_changed,
                                       sizeHint=QSize(30, 100))
        self.group_view.setEnabled(False)

        plot_gui = OWPlotGUI(self)
        plot_gui.box_zoom_select(self.controlArea)

        gui.rubber(self.controlArea)
        gui.auto_send(self.controlArea, self, "auto_commit")

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    def __show_profiles_changed(self):
        self.check_display_options()
        self._update_visibility("profiles")

    def __show_range_changed(self):
        self.check_display_options()
        self._update_visibility("range")

    def __show_mean_changed(self):
        self.check_display_options()
        self._update_visibility("mean")

    def __show_error_changed(self):
        self._update_visibility("error")

    def __group_var_changed(self):
        if self.data is None or not self.graph_variables:
            return
        self.plot_groups()
        self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        self._update_sub_profiles()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self._set_input_summary()
        self.clear()
        self.check_data()
        self.check_display_options()

        if self.data is not None:
            self.group_vars.set_domain(self.data.domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.group_var = self.data.domain.class_var \
                if self.data.domain.has_discrete_class else None

        self.openContext(data)
        self.setup_plot()
        self.unconditional_commit()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        self.clear_messages()
        if self.data is not None:
            self.graph_variables = [
                var for var in self.data.domain.attributes if var.is_continuous
            ]
            self.valid_data = ~countnans(self.data.X, axis=1).astype(bool)
            if len(self.graph_variables) < 1:
                error(self.Error.not_enough_attrs)
            elif not np.sum(self.valid_data):
                error(self.Error.no_valid_data)
            else:
                if not np.all(self.valid_data):
                    self.Information.hidden_instances()
                if len(self.graph_variables) > MAX_FEATURES:
                    self.Information.too_many_features()
                    self.graph_variables = self.graph_variables[:MAX_FEATURES]

    def check_display_options(self):
        self.Warning.no_display_option.clear()
        if self.data is not None:
            if not (self.show_profiles or self.show_range or self.show_mean):
                self.Warning.no_display_option()
            enable = (self.show_profiles or self.show_range) and \
                len(self.data[self.valid_data]) < SEL_MAX_INSTANCES
            self.enable_selection.emit(enable)

    def _set_input_summary(self):
        summary = len(self.data) if self.data else self.info.NoInput
        details = format_summary_details(self.data) if self.data else ""
        self.info.set_input_summary(summary, details)

    @Inputs.data_subset
    @check_sql_input
    def set_subset_data(self, subset):
        self.subset_data = subset

    def handleNewSignals(self):
        self.set_subset_ids()
        if self.data is not None:
            self._update_profiles_color()
            self._update_sel_profiles_color()
            self._update_sub_profiles()

    def set_subset_ids(self):
        sub_ids = {e.id for e in self.subset_data} \
            if self.subset_data is not None else {}
        self.subset_indices = None
        if self.data is not None and sub_ids:
            self.subset_indices = [
                x.id for x in self.data[self.valid_data] if x.id in sub_ids
            ]

    def setup_plot(self):
        if self.data is None:
            return

        ticks = [a.name for a in self.graph_variables]
        self.graph.getAxis("bottom").set_ticks(ticks)
        self.plot_groups()
        self.apply_selection()
        self.graph.view_box.enableAutoRange()
        self.graph.view_box.updateAutoRange()

    def plot_groups(self):
        self._remove_groups()
        data = self.data[self.valid_data, self.graph_variables]
        if self.group_var is None:
            self._plot_group(data, np.where(self.valid_data)[0])
        else:
            class_col_data, _ = self.data.get_column_view(self.group_var)
            for index in range(len(self.group_var.values)):
                mask = np.logical_and(class_col_data == index, self.valid_data)
                indices = np.flatnonzero(mask)
                if not len(indices):
                    continue
                group_data = self.data[indices, self.graph_variables]
                self._plot_group(group_data, indices, index)
        self.graph.update_legend(self.group_var)
        self.graph.groups = self.__groups
        self.graph.view_box.add_profiles(data.X)

    def _remove_groups(self):
        for group in self.__groups:
            group.remove_items()
        self.graph.view_box.remove_profiles()
        self.graph.groups = []
        self.__groups = []

    def _plot_group(self, data, indices, index=None):
        color = self.__get_group_color(index)
        group = ProfileGroup(data, indices, color, self.graph)
        kwargs = self.__get_visibility_flags()
        group.set_visible_error(**kwargs)
        group.set_visible_mean(**kwargs)
        group.set_visible_range(**kwargs)
        group.set_visible_profiles(**kwargs)
        self.__groups.append(group)

    def __get_group_color(self, index):
        if self.group_var is not None:
            return QColor(*self.group_var.colors[index])
        return QColor(LinePlotStyle.DEFAULT_COLOR)

    def __get_visibility_flags(self):
        return {
            "show_profiles": self.show_profiles,
            "show_range": self.show_range,
            "show_mean": self.show_mean,
            "show_error": self.show_error
        }

    def _update_profiles_color(self):
        # color alpha depends on subset and selection; with selection or
        # subset profiles color has more opacity
        if not self.show_profiles:
            return
        for group in self.__groups:
            has_sel = bool(self.subset_indices) or bool(self.selection)
            group.update_profiles_color(has_sel)

    def _update_sel_profiles_and_range(self):
        # mark selected instances and selected range
        if not (self.show_profiles or self.show_range):
            return
        for group in self.__groups:
            inds = [i for i in group.indices if self.__in(i, self.selection)]
            table = self.data[inds, self.graph_variables].X if inds else None
            if self.show_profiles:
                group.update_sel_profiles(table)
            if self.show_range:
                group.update_sel_range(table)

    def _update_sel_profiles_color(self):
        # color depends on subset; when subset is present,
        # selected profiles are black
        if not self.selection or not self.show_profiles:
            return
        for group in self.__groups:
            group.update_sel_profiles_color(bool(self.subset_indices))

    def _update_sub_profiles(self):
        # mark subset instances
        if not (self.show_profiles or self.show_range):
            return
        for group in self.__groups:
            inds = [
                i for i, _id in zip(group.indices, group.ids)
                if self.__in(_id, self.subset_indices)
            ]
            table = self.data[inds, self.graph_variables].X if inds else None
            group.update_sub_profiles(table)

    def _update_visibility(self, obj_name):
        if not len(self.__groups):
            return
        self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        kwargs = self.__get_visibility_flags()
        for group in self.__groups:
            getattr(group, "set_visible_{}".format(obj_name))(**kwargs)
        self.graph.view_box.updateAutoRange()

    def apply_selection(self):
        if self.data is not None and self.__pending_selection is not None:
            sel = [i for i in self.__pending_selection if i < len(self.data)]
            mask = np.zeros(len(self.data), dtype=bool)
            mask[sel] = True
            mask = mask[self.valid_data]
            self.selection_changed(mask)
            self.__pending_selection = None

    def selection_changed(self, mask):
        if self.data is None:
            return
        # need indices for self.data: mask refers to self.data[self.valid_data]
        indices = np.arange(len(self.data))[self.valid_data][mask]
        self.graph.select(indices)
        old = self.selection
        self.selection = None if self.data and isinstance(self.data, SqlTable)\
            else list(self.graph.selection)
        if not old and self.selection or old and not self.selection:
            self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        self.commit()

    def commit(self):
        selected = self.data[self.selection] \
            if self.data is not None and bool(self.selection) else None
        annotated = create_annotated_table(self.data, self.selection)

        summary = len(selected) if selected else self.info.NoOutput
        details = format_summary_details(selected) if selected else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)

    def send_report(self):
        if self.data is None:
            return

        caption = report.render_items_vert((("Group by", self.group_var), ))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def sizeHint(self):
        return QSize(1132, 708)

    def clear(self):
        self.valid_data = None
        self.selection = None
        self.__groups = []
        self.graph_variables = []
        self.graph.reset()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)

    @staticmethod
    def __in(obj, collection):
        return collection is not None and obj in collection

    def set_visual_settings(self, key, value):
        self.graph.parameter_setter.set_parameter(key, value)
        self.visual_settings[key] = value
Example #44
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `attr_changed`.

    - `attr_changed` handles changes of attribute or grouping (callbacks for
    list boxes). It recomputes box data by calling `compute_box_data`, shows
    the appropriate display box (discrete/continuous) and then calls
    `layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    inputs = [("Data", Orange.data.Table, "set_data")]

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()

    attribute = ContextSetting(None)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMedians)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QtGui.QPen(QtCore.Qt.white, 5)
    _pen_axis = QtGui.QPen(QtCore.Qt.darkGray, 3)
    _pen_median = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(QtCore.Qt.DotLine)
    _post_line_pen = QtGui.QPen(QtCore.Qt.lightGray, 2)
    _post_grp_pen = QtGui.QPen(QtCore.Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(QtCore.Qt.RoundCap)
        pen.setJoinStyle(QtCore.Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(QtCore.Qt.FlatCap)

    _box_brush = QtGui.QBrush(QtGui.QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QtGui.QFont()
    _axis_font.setPixelSize(12)
    _label_font = QtGui.QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QtGui.QBrush(QtGui.QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        common_options = dict(callback=self.attr_changed, sizeHint=(200, 100))
        self.attrs = VariableListModel()
        gui.listView(self.controlArea,
                     self,
                     "attribute",
                     box="Variable",
                     model=self.attrs,
                     **common_options)
        self.group_vars = VariableListModel()
        gui.listView(self.controlArea,
                     self,
                     "group_var",
                     box="Grouping",
                     model=self.group_vars,
                     **common_options)

        # TODO: move Compare median/mean to grouping box
        self.display_box = gui.vBox(self.controlArea, "Display")

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.display_changed)

        self.stretching_box = gui.checkBox(self.controlArea,
                                           self,
                                           'stretched',
                                           "Stretch bars",
                                           box='Display',
                                           callback=self.display_changed).box

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QtGui.QGraphicsScene()
        self.box_view = QtGui.QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QtGui.QPainter.Antialiasing
                                     | QtGui.QPainter.TextAntialiasing
                                     | QtGui.QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(650)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QtCore.QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    # noinspection PyTypeChecker
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars[:] = \
                [None] + \
                [a for a in chain(domain.variables, domain.metas)
                 if a.is_discrete]
            self.attrs[:] = chain(domain.variables,
                                  (a
                                   for a in domain.metas if a.is_primitive()))
            if self.attrs:
                self.attribute = self.attrs[0]
            if domain.class_var and domain.class_var.is_discrete:
                self.group_var = domain.class_var
            else:
                self.group_var = None  # Reset to trigger selection via callback
            self.openContext(self.dataset)
            self.attr_changed()
        else:
            self.reset_all_data()

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        if dataset is None:
            self.stats = self.dist = self.conts = []
            return
        self.is_continuous = attr.is_continuous
        if self.group_var:
            self.dist = []
            self.conts = datacaching.getCached(dataset,
                                               contingency.get_contingency,
                                               (dataset, attr, self.group_var))
            if self.is_continuous:
                self.stats = [BoxData(cont) for cont in self.conts]
            self.label_txts_all = self.group_var.values
        else:
            self.dist = datacaching.getCached(dataset,
                                              distribution.get_distribution,
                                              (dataset, attr))
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()

    def clear_scene(self):
        self.box_scene.clear()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.mean_labels = [
            self.mean_label(stat, attr, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, attr, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QtGui.QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.boxes, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            self.order = sorted(
                self.order, key=lambda i: getattr(self.stats[i], criterion))

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            self.boxes[box_index].setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QtCore.QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                          self.scene_width,
                          len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()

    def display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [
            QtGui.QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QtGui.QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts
                ]
            else:
                self.labels = [
                    QtGui.QGraphicsTextItem(str(int(sum(self.dist))))
                ]

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = [self.strudel(cont) for cont in self.conts]
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box in enumerate(self.boxes):
            y = (-len(self.boxes) + row) * 40 + 10

            label = self.attr_labels[row]
            b = label.boundingRect()
            label.setPos(-b.width() - 10, y - b.height() / 2)
            self.box_scene.addItem(label)
            if not self.stretched:
                label = self.labels[row]
                b = label.boundingRect()
                if self.group_var:
                    right = self.scale_x * sum(self.conts[row])
                else:
                    right = self.scale_x * sum(self.dist)
                label.setPos(right + 10, y - b.height() / 2)
                self.box_scene.addItem(label)

            if self.attribute is not self.group_var:
                for text_item, bar_part in zip(box.childItems()[1::2],
                                               box.childItems()[::2]):
                    label = QtGui.QGraphicsSimpleTextItem(
                        text_item.toPlainText())
                    label.setPos(bar_part.boundingRect().x(),
                                 y - label.boundingRect().height() - 8)
                    self.box_scene.addItem(label)
            for text_item in box.childItems()[1::2]:
                box.removeFromGroup(text_item)
            self.box_scene.addItem(box)
            box.setPos(0, y)
        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self.infot1.setText("")

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QtGui.QGraphicsItemGroup()
        t = QtGui.QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QtGui.QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QtGui.QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]))]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)

            t = self.box_scene.addSimpleText(
                self.attribute.repr_val(val) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(t.flags()
                       | QtGui.QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        if self.stretched:
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QtGui.QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QtGui.QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QtGui.QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QtGui.QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QtGui.QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        msc = stat.median * self.scale_x
        med_t = centered_text(stat.median, msc)
        med_box_width2 = med_t.boundingRect().width()
        line(msc)

        x = stat.q25 * self.scale_x
        t = centered_text(stat.q25, x)
        t_box = t.boundingRect()
        med_left = msc - med_box_width2
        if x + t_box.width() / 2 >= med_left - 5:
            move_label(t, x, med_left - t_box.width() - 5)
        else:
            line(x)

        x = stat.q75 * self.scale_x
        t = centered_text(stat.q75, x)
        t_box = t.boundingRect()
        med_right = msc + med_box_width2
        if x - t_box.width() / 2 <= med_right + 5:
            move_label(t, x, med_right + 5)
        else:
            line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QtGui.QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1,
                                           *args)

        scale_x = self.scale_x
        box = QtGui.QGraphicsItemGroup()
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5, box)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5, box)
        vert_line = line(stat.a_min, 0, stat.a_max, 0, box)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3, box)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0, box)
        var_line.setPen(self._pen_paramet)

        mbox = QtGui.QGraphicsRectItem(stat.q25 * scale_x, -height / 2,
                                       (stat.q75 - stat.q25) * scale_x, height,
                                       box)
        mbox.setBrush(self._box_brush)
        mbox.setPen(QtGui.QPen(QtCore.Qt.NoPen))
        mbox.setZValue(-200)

        median_line = line(stat.median, -height / 2, stat.median, height / 2,
                           box)
        median_line.setPen(self._pen_median)
        median_line.setZValue(-150)

        return box

    def strudel(self, dist):
        attr = self.attribute
        ss = np.sum(dist)
        box = QtGui.QGraphicsItemGroup()
        if ss < 1e-6:
            QtGui.QGraphicsRectItem(0, -10, 1, 10, box)
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            rect = QtGui.QGraphicsRectItem(cum + 1, -6, v - 2, 12, box)
            rect.setBrush(QtGui.QBrush(QtGui.QColor(*attr.colors[i])))
            rect.setPen(QtGui.QPen(QtCore.Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QtGui.QGraphicsTextItem(attr.values[i])
            box.addToGroup(text)
            cum += v
        return box

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line) * self.scale_x
            xs.append(x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if last_to == to or frm == to:
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        if self.attribute:
            return self.attribute.name

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)
class OWPieChart(widget.OWWidget):
    name = "Pie Chart"
    description = "Make fun of Pie Charts."
    keywords = ["pie chart", "chart", "visualisation"]
    icon = "icons/PieChart.svg"
    priority = 700

    class Inputs:
        data = Input("Data", Orange.data.Table)

    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    split_var = ContextSetting(None)
    explode = Setting(False)
    graph_name = "scene"

    def __init__(self):
        super().__init__()
        self.dataset = None

        self.attrs = DomainModel(valid_types=Orange.data.DiscreteVariable,
                                 separators=False)
        cb = gui.comboBox(self.controlArea,
                          self,
                          "attribute",
                          box=True,
                          model=self.attrs,
                          callback=self.update_scene,
                          contentsLength=12)
        grid = QGridLayout()
        self.legend = gui.widgetBox(gui.indentedBox(cb.box), orientation=grid)
        grid.setColumnStretch(1, 1)
        grid.setHorizontalSpacing(6)
        self.legend_items = []
        self.split_vars = DomainModel(
            valid_types=Orange.data.DiscreteVariable,
            separators=False,
            placeholder="None",
        )
        self.split_combobox = gui.comboBox(self.controlArea,
                                           self,
                                           "split_var",
                                           box="Split by",
                                           model=self.split_vars,
                                           callback=self.update_scene)
        self.explode_checkbox = gui.checkBox(self.controlArea,
                                             self,
                                             "explode",
                                             "Explode pies",
                                             box=True,
                                             callback=self.update_scene)
        gui.rubber(self.controlArea)
        gui.widgetLabel(
            gui.hBox(self.controlArea, box=True),
            "The aim of this widget is to\n"
            "demonstrate that pie charts are\n"
            "a terrible visualization. Please\n"
            "don't use it for any other purpose.")

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHints(QPainter.Antialiasing
                                 | QPainter.TextAntialiasing
                                 | QPainter.SmoothPixmapTransform)
        self.mainArea.layout().addWidget(self.view)
        self.mainArea.setMinimumWidth(500)

    def sizeHint(self):
        return QSize(200, 150)  # Horizontal size is regulated by mainArea

    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.attribute = None
        self.split_var = None
        domain = dataset.domain if dataset is not None else None
        self.attrs.set_domain(domain)
        self.split_vars.set_domain(domain)
        if dataset is not None:
            self.select_default_variables(domain)
            self.openContext(self.dataset)
        self.update_scene()

    def select_default_variables(self, domain):
        if len(self.attrs) > len(domain.class_vars):
            first_attr = self.split_vars[len(domain.class_vars)]
        else:
            first_attr = None
        if len(self.attrs):
            self.attribute, self.split_var = self.attrs[0], first_attr
        else:
            self.attribute, self.split_var = self.split_var, None

    def update_scene(self):
        self.scene.clear()
        if self.dataset is None or self.attribute is None:
            return
        dists, labels = self.compute_box_data()
        colors = self.attribute.colors
        for x, (dist, label) in enumerate(zip(dists, labels)):
            self.pie_chart(SCALE * x, 0, 0.8 * SCALE, dist, colors)
            self.pie_label(SCALE * x, 0, label)
        self.update_legend([QColor(*col) for col in colors],
                           self.attribute.values)
        self.view.centerOn(SCALE * len(dists) / 2, 0)

    def update_legend(self, colors, labels):
        layout = self.legend.layout()
        while self.legend_items:
            w = self.legend_items.pop()
            layout.removeWidget(w)
            w.deleteLater()
        for row, (color, label) in enumerate(zip(colors, labels)):
            icon = QLabel()
            p = QPixmap(12, 12)
            p.fill(color)
            icon.setPixmap(p)
            label = QLabel(label)
            layout.addWidget(icon, row, 0)
            layout.addWidget(label, row, 1, alignment=Qt.AlignLeft)
            self.legend_items += (icon, label)

    def pie_chart(self, x, y, r, dist, colors):
        start_angle = 0
        dist = np.asarray(dist)
        spans = dist / (float(np.sum(dist)) or 1) * 360 * 16
        for span, color in zip(spans, colors):
            if not span:
                continue
            if self.explode:
                mid_ang = (start_angle + span / 2) / 360 / 16 * 2 * pi
                dx = r / 30 * cos(mid_ang)
                dy = r / 30 * sin(mid_ang)
            else:
                dx = dy = 0
            ellipse = QGraphicsEllipseItem(x - r / 2 + dx, y - r / 2 - dy, r,
                                           r)
            if len(spans) > 1:
                ellipse.setStartAngle(start_angle)
                ellipse.setSpanAngle(span)
            ellipse.setBrush(QColor(*color))
            self.scene.addItem(ellipse)
            start_angle += span

    def pie_label(self, x, y, label):
        if not label:
            return
        text = QGraphicsSimpleTextItem(label)
        for cut in range(1, len(label)):
            if text.boundingRect().width() < 0.95 * SCALE:
                break
            text = QGraphicsSimpleTextItem(label[:-cut] + "...")
        text.setPos(x - text.boundingRect().width() / 2, y + 0.5 * SCALE)
        self.scene.addItem(text)

    def compute_box_data(self):
        if self.split_var:
            return (contingency.get_contingency(self.dataset, self.attribute,
                                                self.split_var),
                    self.split_var.values)
        else:
            return [
                distribution.get_distribution(self.dataset, self.attribute)
            ], [""]

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute is not None:
            text += "Box plot for '{}' ".format(self.attribute.name)
        if self.split_var is not None:
            text += "split by '{}'".format(self.split_var.name)
        if text:
            self.report_caption(text)
Example #46
0
class OWSieveDiagram(OWWidget):
    name = "Sieve Diagram"
    description = "Visualize the observed and expected frequencies " \
                  "for a combination of values."
    icon = "icons/SieveDiagram.svg"
    priority = 200
    keywords = []

    class Inputs:
        data = Input("Data", Table, default=True)
        features = Input("Features", AttributeList)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    graph_name = "canvas"

    want_control_area = False

    settings_version = 1
    settingsHandler = DomainContextHandler()
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection = ContextSetting(set())

    xy_changed_manually = Signal(Variable, Variable)

    def __init__(self):
        # pylint: disable=missing-docstring
        super().__init__()

        self.data = self.discrete_data = None
        self.attrs = []
        self.input_features = None
        self.areas = []
        self.selection = set()

        self.attr_box = gui.hBox(self.mainArea)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE)
        combo_args = dict(widget=self.attr_box,
                          master=self,
                          contentsLength=12,
                          callback=self.attr_changed,
                          sendSelectedValue=True,
                          valueType=str,
                          model=self.domain_model)
        fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed)
        gui.comboBox(value="attr_x", **combo_args)
        gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size)
        gui.comboBox(value="attr_y", **combo_args)
        self.vizrank, self.vizrank_button = SieveRank.add_vizrank(
            self.attr_box, self, "Score Combinations", self.set_attr)
        self.vizrank_button.setSizePolicy(*fixed_size)

        self.canvas = QGraphicsScene()
        self.canvasView = ViewWithPress(self.canvas,
                                        self.mainArea,
                                        handler=self.reset_selection)
        self.mainArea.layout().addWidget(self.canvasView)
        self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

    def sizeHint(self):
        return QSize(450, 550)

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.update_graph()

    def showEvent(self, event):
        super().showEvent(event)
        self.update_graph()

    @classmethod
    def migrate_context(cls, context, version):
        if not version:
            settings.rename_setting(context, "attrX", "attr_x")
            settings.rename_setting(context, "attrY", "attr_y")
            settings.migrate_str_to_variable(context)

    @Inputs.data
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
            self.discrete_data = None
        else:
            self.domain_model.set_domain(data.domain)
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        if self.data:
            self.discrete_data = self.sparse_to_dense(data, True)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(self.data is not None
                                       and len(self.data) > 1
                                       and len(self.data.domain.attributes) > 1
                                       and not self.data.is_sparse())

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def attr_changed(self):
        self.update_attr()
        self.xy_changed_manually.emit(self.attr_x, self.attr_y)

    def update_attr(self):
        """Update the graph and selection."""
        self.selection = set()
        self.discrete_data = self.sparse_to_dense(self.data)
        self.update_graph()
        self.update_selection()

    def sparse_to_dense(self, data, init=False):
        """
        Extracts two selected columns from sparse matrix.
        GH-2260
        """
        def discretizer(data):
            if any(attr.is_continuous for attr in chain(
                    data.domain.variables, data.domain.metas)):
                discretize = Discretize(method=EqualFreq(n=4),
                                        remove_const=False,
                                        discretize_classes=True,
                                        discretize_metas=True)
                return discretize(data).to_dense()
            return data

        if not data.is_sparse() and not init:
            return self.discrete_data
        if data.is_sparse():
            attrs = {self.attr_x, self.attr_y}
            new_domain = data.domain.select_columns(attrs)
            data = Table.from_table(new_domain, data)
        return discretizer(data)

    @Inputs.features
    def set_input_features(self, attr_list):
        """
        Handler for the Features signal.

        The method stores the attributes and calls `resolve_shown_attributes`

        Args:
            attr_list (AttributeList): data from the signal
        """
        self.input_features = attr_list
        self.resolve_shown_attributes()
        self.update_selection()

    def resolve_shown_attributes(self):
        """
        Use the attributes from the input signal if the signal is present
        and at least two attributes appear in the domain. If there are
        multiple, use the first two. Combos are disabled if inputs are used.
        """
        self.warning()
        self.attr_box.setEnabled(True)
        self.vizrank.setEnabled(True)
        if not self.input_features:  # None or empty
            return
        features = [f for f in self.input_features if f in self.domain_model]
        if not features:
            self.warning(
                "Features from the input signal are not present in the data")
            return
        old_attrs = self.attr_x, self.attr_y
        self.attr_x, self.attr_y = [f for f in (features * 2)[:2]]
        self.attr_box.setEnabled(False)
        self.vizrank.setEnabled(False)
        if (self.attr_x, self.attr_y) != old_attrs:
            self.selection = set()
            self.update_graph()

    def reset_selection(self):
        self.selection = set()
        self.update_selection()

    def select_area(self, area, event):
        """
        Add or remove the clicked area from the selection

        Args:
            area (QRect): the area that is clicked
            event (QEvent): event description
        """
        if event.button() != Qt.LeftButton:
            return
        index = self.areas.index(area)
        if event.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection()

    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attr_x.name, [val_x]),
                        filter.FilterDiscrete(self.attr_y.name, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))

    def update_graph(self):
        # Function uses weird names like r, g, b, but it does it with utmost
        # caution, hence
        # pylint: disable=invalid-name
        """Update the graph."""
        def text(txt, *args, **kwargs):
            text = html_text = None
            if "max_width" in kwargs:
                text = txt
            else:
                html_text = to_html(txt)
            return CanvasText(self.canvas,
                              text,
                              html_text=html_text,
                              *args,
                              **kwargs)

        def width(txt):
            return text(txt, 0, 0, show=False).boundingRect().width()

        def height(txt):
            return text(txt, 0, 0, show=False).boundingRect().height()

        def fmt(val):
            return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val)

        def show_pearson(rect, pearson, pen_width):
            """
            Color the given rectangle according to its corresponding
            standardized Pearson residual.

            Args:
                rect (QRect): the rectangle being drawn
                pearson (float): signed standardized pearson residual
                pen_width (int): pen width (bolder pen is used for selection)
            """
            r = rect.rect()
            x, y, w, h = r.x(), r.y(), r.width(), r.height()
            if w == 0 or h == 0:
                return

            r = b = 255
            if pearson > 0:
                r = g = max(255 - 20 * pearson, 55)
            elif pearson < 0:
                b = g = max(255 + 20 * pearson, 55)
            else:
                r = g = b = 224
            rect.setBrush(QBrush(QColor(r, g, b)))
            pen_color = QColor(255 * (r == 255), 255 * (g == 255),
                               255 * (b == 255))
            pen = QPen(pen_color, pen_width)
            rect.setPen(pen)
            if pearson > 0:
                pearson = min(pearson, 10)
                dist = 20 - 1.6 * pearson
            else:
                pearson = max(pearson, -10)
                dist = 20 - 8 * pearson
            pen.setWidth(1)

            def _offseted_line(ax, ay):
                r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w),
                                      y + (ay or h))
                self.canvas.addItem(r)
                r.setPen(pen)

            ax = dist
            while ax < w:
                _offseted_line(ax, 0)
                ax += dist

            ay = dist
            while ay < h:
                _offseted_line(0, ay)
                ay += dist

        def make_tooltip():
            """Create the tooltip. The function uses local variables from
            the enclosing scope."""

            # pylint: disable=undefined-loop-variable
            def _oper(attr, txt):
                if self.data.domain[attr.name] == ddomain[attr.name]:
                    return " = "
                return " " if txt[0] in "<≥" else " in "

            xt, yt = [
                "<b>{attr}{eq}{val_name}</b>: {obs}/{n} ({p:.0f} %)".format(
                    attr=to_html(attr.name),
                    eq=_oper(attr, val_name),
                    val_name=to_html(val_name),
                    obs=fmt(prob * n),
                    n=int(n),
                    p=100 * prob) for attr, val_name, prob in [(
                        attr_x, xval_name,
                        chi.probs_x[x]), (attr_y, yval_name, chi.probs_y[y])]
            ]

            ct = """<b>combination of values: </b><br/>
                   &nbsp;&nbsp;&nbsp;expected {exp} ({p_exp:.0f} %)<br/>
                   &nbsp;&nbsp;&nbsp;observed {obs} ({p_obs:.0f} %)""".format(
                exp=fmt(chi.expected[y, x]),
                p_exp=100 * chi.expected[y, x] / n,
                obs=fmt(chi.observed[y, x]),
                p_obs=100 * chi.observed[y, x] / n)

            return f"{xt}<br/>{yt}<hr/>{ct}"

        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attr_x is None or self.attr_y is None:
            return

        ddomain = self.discrete_data.domain
        attr_x, attr_y = self.attr_x, self.attr_y
        disc_x, disc_y = ddomain[attr_x.name], ddomain[attr_y.name]
        view = self.canvasView

        chi = ChiSqStats(self.discrete_data, disc_x, disc_y)
        max_ylabel_w = max((width(val) for val in disc_y.values), default=0)
        max_ylabel_w = min(max_ylabel_w, 200)
        x_off = height(attr_y.name) + max_ylabel_w
        y_off = 15
        square_size = min(view.width() - x_off - 35,
                          view.height() - y_off - 80)
        square_size = max(square_size, 10)
        self.canvasView.setSceneRect(0, 0, view.width(), view.height())
        if not disc_x.values or not disc_y.values:
            text_ = "Features {} and {} have no values".format(disc_x, disc_y) \
                if not disc_x.values and \
                   not disc_y.values and \
                          disc_x != disc_y \
                else \
                    "Feature {} has no values".format(
                        disc_x if not disc_x.values else disc_y)
            text(text_,
                 view.width() / 2 + 70,
                 view.height() / 2, Qt.AlignRight | Qt.AlignVCenter)
            return
        n = chi.n
        curr_x = x_off
        max_xlabel_h = 0
        self.areas = []
        for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)):
            if px == 0:
                continue
            width = square_size * px

            curr_y = y_off
            for y in range(len(chi.probs_y) - 1, -1, -1):  # bottom-up order
                py = chi.probs_y[y]
                yval_name = disc_y.values[y]
                if py == 0:
                    continue
                height = square_size * py

                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(self.canvas,
                                       curr_x + 2,
                                       curr_y + 2,
                                       width - 4,
                                       height - 4,
                                       z=-10,
                                       onclick=self.select_area)
                rect.value_pair = x, y
                self.areas.append(rect)
                show_pearson(rect, chi.residuals[y, x], 3 * selected)
                rect.setToolTip(make_tooltip())

                if x == 0:
                    text(yval_name, x_off, curr_y + height / 2,
                         Qt.AlignRight | Qt.AlignVCenter)
                curr_y += height

            xl = text(xval_name,
                      curr_x + width / 2,
                      y_off + square_size,
                      Qt.AlignHCenter | Qt.AlignTop,
                      max_width=width)
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)
            curr_x += width

        bottom = y_off + square_size + max_xlabel_h
        text(attr_y.name,
             0,
             y_off + square_size / 2,
             Qt.AlignLeft | Qt.AlignVCenter,
             bold=True,
             vertical=True)
        text(attr_x.name,
             x_off + square_size / 2,
             bottom,
             Qt.AlignHCenter | Qt.AlignTop,
             bold=True)
        bottom += 30
        xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom)
        # Assume similar height for both lines
        text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height())

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)
        return None

    def send_report(self):
        self.report_plot()
Example #47
0
class OWScatterPlot(OWWidget):
    name = 'Scatter Plot'
    description = "Interactive scatter plot visualization with " \
                  "intelligent data visualization enhancements."
    icon = "icons/ScatterPlot.svg"
    priority = 210

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data"),
              ("Features", AttributeList, "set_shown_attributes")]

    outputs = [("Selected Data", Table, Default), ("Other Data", Table),
               ("Features", Table)]

    settingsHandler = DomainContextHandler()

    auto_send_selection = Setting(True)
    auto_sample = Setting(True)
    toolbar_selection = Setting(0)

    attr_x = ContextSetting("")
    attr_y = ContextSetting("")

    graph = SettingProvider(OWScatterPlotGraph)

    jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10]

    graph_name = "graph.plot_widget.plotItem"

    class Information(OWWidget.Information):
        sampled_sql = Msg("Large SQL table; showing a sample.")

    def __init__(self):
        super().__init__()

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWScatterPlotGraph(self, box, "ScatterPlot")
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        axispen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text))
        axis = plot.getAxis("bottom")
        axis.setPen(axispen)

        axis = plot.getAxis("left")
        axis.setPen(axispen)

        self.data = None  # Orange.data.Table
        self.subset_data = None  # Orange.data.Table
        self.data_metas_X = None  # self.data, where primitive metas are moved to X
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)
        box = gui.vBox(self.controlArea, "Axis Data")
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.update_attr,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.update_attr,
                                      **common_options)

        self.vizrank = ScatterPlotVizRank(self)
        vizrank_box = gui.hBox(box)
        gui.separator(vizrank_box, width=common_options["labelWidth"])
        self.vizrank_button_tooltip = "Find informative projections"
        self.vizrank_button = gui.button(vizrank_box,
                                         self,
                                         "Score Plots",
                                         callback=self.vizrank.reshow,
                                         tooltip=self.vizrank_button_tooltip,
                                         enabled=False)
        self.vizrank.pairSelected.connect(self.set_attr)

        gui.separator(box)

        gui.valueSlider(box,
                        self,
                        value='graph.jitter_size',
                        label='Jittering: ',
                        values=self.jitter_sizes,
                        callback=self.reset_graph_data,
                        labelFormat=lambda x: "None"
                        if x == 0 else ("%.1f %%" if x < 1 else "%d %%") % x)
        gui.checkBox(gui.indentedBox(box),
                     self,
                     'graph.jitter_continuous',
                     'Jitter continuous values',
                     callback=self.reset_graph_data)

        self.sampling = gui.auto_commit(self.controlArea,
                                        self,
                                        "auto_sample",
                                        "Sample",
                                        box="Sampling",
                                        callback=self.switch_sampling,
                                        commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

        box = gui.vBox(self.controlArea, "Points")
        self.cb_attr_color = gui.comboBox(box,
                                          self,
                                          "graph.attr_color",
                                          label="Color:",
                                          emptyString="(Same color)",
                                          callback=self.update_colors,
                                          **common_options)
        self.cb_attr_label = gui.comboBox(box,
                                          self,
                                          "graph.attr_label",
                                          label="Label:",
                                          emptyString="(No labels)",
                                          callback=self.graph.update_labels,
                                          **common_options)
        self.cb_attr_shape = gui.comboBox(box,
                                          self,
                                          "graph.attr_shape",
                                          label="Shape:",
                                          emptyString="(Same shape)",
                                          callback=self.graph.update_shapes,
                                          **common_options)
        self.cb_attr_size = gui.comboBox(box,
                                         self,
                                         "graph.attr_size",
                                         label="Size:",
                                         emptyString="(Same size)",
                                         callback=self.graph.update_sizes,
                                         **common_options)

        g = self.graph.gui
        box2 = g.point_properties_box(self.controlArea, box)

        box = gui.vBox(self.controlArea, "Plot Properties")
        g.add_widgets([g.ShowLegend, g.ShowGridLines], box)
        gui.checkBox(box,
                     self,
                     value='graph.tooltip_shows_all',
                     label='Show all data on mouse hover')
        self.cb_class_density = gui.checkBox(box,
                                             self,
                                             value='graph.class_density',
                                             label='Show class density',
                                             callback=self.update_density)
        gui.checkBox(box,
                     self,
                     'graph.label_only_selected',
                     'Label only selected points',
                     callback=self.graph.update_labels)

        self.zoom_select_toolbar = g.zoom_select_toolbar(
            gui.vBox(self.controlArea, "Zoom/Select"),
            nomargin=True,
            buttons=[
                g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom,
                g.StateButtonsEnd, g.ZoomReset
            ])
        buttons = self.zoom_select_toolbar.buttons
        buttons[g.Zoom].clicked.connect(self.graph.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.graph.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(
            self.graph.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.graph.reset_button_clicked)
        self.controlArea.layout().addStretch(100)
        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_send_selection",
                        "Send Selection", "Send Automatically")

        def zoom(s):
            """Zoom in/out by factor `s`."""
            viewbox = plot.getViewBox()
            # scaleBy scales the view's bounds (the axis range)
            viewbox.scaleBy((1 / s, 1 / s))

        def fit_to_view():
            viewbox = plot.getViewBox()
            viewbox.autoRange()

        zoom_in = QtGui.QAction("Zoom in", self, triggered=lambda: zoom(1.25))
        zoom_in.setShortcuts([
            QtGui.QKeySequence(QtGui.QKeySequence.ZoomIn),
            QtGui.QKeySequence(self.tr("Ctrl+="))
        ])
        zoom_out = QtGui.QAction("Zoom out",
                                 self,
                                 shortcut=QtGui.QKeySequence.ZoomOut,
                                 triggered=lambda: zoom(1 / 1.25))
        zoom_fit = QtGui.QAction("Fit in view",
                                 self,
                                 shortcut=QtGui.QKeySequence(Qt.ControlModifier
                                                             | Qt.Key_0),
                                 triggered=fit_to_view)
        self.addActions([zoom_in, zoom_out, zoom_fit])

    # def settingsFromWidgetCallback(self, handler, context):
    #     context.selectionPolygons = []
    #     for curve in self.graph.selectionCurveList:
    #         xs = [curve.x(i) for i in range(curve.dataSize())]
    #         ys = [curve.y(i) for i in range(curve.dataSize())]
    #         context.selectionPolygons.append((xs, ys))

    # def settingsToWidgetCallback(self, handler, context):
    #     selections = getattr(context, "selectionPolygons", [])
    #     for (xs, ys) in selections:
    #         c = SelectionCurve("")
    #         c.setData(xs,ys)
    #         c.attach(self.graph)
    #         self.graph.selectionCurveList.append(c)

    def reset_graph_data(self, *_):
        self.graph.rescale_data()
        self.update_graph()

    def set_data(self, data):
        self.Information.sampled_sql.clear()
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(data, SqlTable):
            if data.approx_len() < 4000:
                data = Table(data)
            else:
                self.Information.sampled_sql()
                self.sql_data = data
                data_sample = data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if data is not None and (len(data) == 0 or len(data.domain) == 0):
            data = None
        if self.data and data and self.data.checksum() == data.checksum():
            return

        self.closeContext()
        same_domain = (self.data and data and data.domain.checksum()
                       == self.data.domain.checksum())
        self.data = data
        self.data_metas_X = self.move_primitive_metas_to_X(data)

        if not same_domain:
            self.init_attr_values()
        self.vizrank.initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and self.data.domain.class_var is not None
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1)
        if self.data is not None and self.data.domain.class_var is None \
            and len(self.data.domain.attributes) > 1 and len(self.data) > 1:
            self.vizrank_button.setToolTip(
                "Data with a class variable is required.")
        else:
            self.vizrank_button.setToolTip(self.vizrank_button_tooltip)
        self.openContext(self.data)

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            return self.__timer.stop()
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.data_metas_X = self.move_primitive_metas_to_X(self.data)
            self.handleNewSignals()

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def move_primitive_metas_to_X(self, data):
        if data is not None:
            new_attrs = [
                a for a in data.domain.attributes + data.domain.metas
                if a.is_primitive()
            ]
            new_metas = [m for m in data.domain.metas if not m.is_primitive()]
            data = Table.from_table(
                Domain(new_attrs, data.domain.class_vars, new_metas), data)
        return data

    def set_subset_data(self, subset_data):
        self.warning()
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning("Data subset does not support large Sql tables")
                subset_data = None
        self.subset_data = self.move_primitive_metas_to_X(subset_data)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        self.graph.new_data(self.data_metas_X, self.subset_data)
        if self.attribute_selection_list and \
                all(attr in self.graph.data_domain
                    for attr in self.attribute_selection_list):
            self.attr_x = self.attribute_selection_list[0].name
            self.attr_y = self.attribute_selection_list[1].name
        self.attribute_selection_list = None
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.unconditional_commit()

    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
        else:
            self.attribute_selection_list = None

    def get_shown_attributes(self):
        return self.attr_x, self.attr_y

    def init_attr_values(self):
        self.cb_attr_x.clear()
        self.attr_x = None
        self.cb_attr_y.clear()
        self.attr_y = None
        self.cb_attr_color.clear()
        self.cb_attr_color.addItem("(Same color)")
        self.graph.attr_color = None
        self.cb_attr_label.clear()
        self.cb_attr_label.addItem("(No labels)")
        self.graph.attr_label = None
        self.cb_attr_shape.clear()
        self.cb_attr_shape.addItem("(Same shape)")
        self.graph.attr_shape = None
        self.cb_attr_size.clear()
        self.cb_attr_size.addItem("(Same size)")
        self.graph.attr_size = None
        if not self.data:
            return

        for var in self.data.domain.metas:
            if not var.is_primitive():
                self.cb_attr_label.addItem(self.icons[var], var.name)
        for attr in self.data.domain.variables:
            self.cb_attr_x.addItem(self.icons[attr], attr.name)
            self.cb_attr_y.addItem(self.icons[attr], attr.name)
            self.cb_attr_color.addItem(self.icons[attr], attr.name)
            if attr.is_discrete:
                self.cb_attr_shape.addItem(self.icons[attr], attr.name)
            else:
                self.cb_attr_size.addItem(self.icons[attr], attr.name)
            self.cb_attr_label.addItem(self.icons[attr], attr.name)
        for var in self.data.domain.metas:
            if var.is_primitive():
                self.cb_attr_x.addItem(self.icons[var], var.name)
                self.cb_attr_y.addItem(self.icons[var], var.name)
                self.cb_attr_color.addItem(self.icons[var], var.name)
                if var.is_discrete:
                    self.cb_attr_shape.addItem(self.icons[var], var.name)
                else:
                    self.cb_attr_size.addItem(self.icons[var], var.name)
                self.cb_attr_label.addItem(self.icons[var], var.name)

        self.attr_x = self.cb_attr_x.itemText(0)
        if self.cb_attr_y.count() > 1:
            self.attr_y = self.cb_attr_y.itemText(1)
        else:
            self.attr_y = self.cb_attr_y.itemText(0)

        if self.data.domain.class_var:
            self.graph.attr_color = self.data.domain.class_var.name
        else:
            self.graph.attr_color = ""
        self.graph.attr_shape = ""
        self.graph.attr_size = ""
        self.graph.attr_label = ""

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x.name, attr_y.name
        self.update_attr()

    def update_attr(self):
        self.update_graph()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())
        self.send_features()

    def update_colors(self):
        self.graph.update_colors()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def update_density(self):
        self.update_graph(reset_view=False)

    def update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if not self.graph.have_data:
            return
        self.graph.update_data(self.attr_x, self.attr_y, reset_view)

    def selection_changed(self):
        self.send_data()

    def send_data(self):
        selected = unselected = None
        # TODO: Implement selection for sql data
        if isinstance(self.data, SqlTable):
            selected = unselected = self.data
        elif self.data is not None:
            selection = self.graph.get_selection()
            if len(selection) == 0:
                self.send("Selected Data", None)
                self.send("Other Data", self.data)
                return
            selected = self.data[selection]
            unselection = np.full(len(self.data), True, dtype=bool)
            unselection[selection] = False
            unselected = self.data[unselection]
        self.send("Selected Data", selected)
        if unselected is None or len(unselected) == 0:
            self.send("Other Data", None)
        else:
            self.send("Other Data", unselected)

    def send_features(self):
        features = None
        if self.attr_x or self.attr_y:
            dom = Domain([], metas=(StringVariable(name="feature"), ))
            features = Table(dom, [[self.attr_x], [self.attr_y]])
            features.name = "Features"
        self.send("Features", features)

    def commit(self):
        self.send_data()
        self.send_features()

    def closeEvent(self, ce):
        self.vizrank.close()
        super().closeEvent(ce)

    def hideEvent(self, he):
        self.vizrank.hide()
        super().hideEvent(he)

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.combo_value(self.cb_attr_x),
                                     self.combo_value(self.cb_attr_y))

    def send_report(self):
        disc_attr = False
        if self.data:
            domain = self.data.domain
            disc_attr = domain[self.attr_x].is_discrete or \
                        domain[self.attr_y].is_discrete
        caption = report.render_items_vert(
            (("Color", self.combo_value(self.cb_attr_color)),
             ("Label", self.combo_value(self.cb_attr_label)),
             ("Shape", self.combo_value(self.cb_attr_shape)),
             ("Size", self.combo_value(self.cb_attr_size)),
             ("Jittering", (self.graph.jitter_continuous or disc_attr)
              and self.graph.jitter_size)))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.graph.plot_widget.getViewBox().deleteLater()
        self.graph.plot_widget.clear()
class TestDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(attributes=[
            ContinuousVariable('c1'),
            DiscreteVariable('d1', values='abc'),
            DiscreteVariable('d2', values='def')
        ],
                             class_vars=[DiscreteVariable('d3', values='ghi')],
                             metas=[
                                 ContinuousVariable('c2'),
                                 DiscreteVariable('d4', values='jkl')
                             ])
        self.args = (self.domain, {
            'c1': Continuous - 100,
            'd1': Discrete - 100,
            'd2': Discrete - 100,
            'd3': Discrete - 100
        }, {
            'c2': Continuous - 100,
            'd4': Discrete - 100,
        })
        self.handler = DomainContextHandler()
        self.handler.read_defaults = lambda: None

    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(
            encoded_attributes, {
                'c1': Continuous - 100,
                'd1': Discrete - 100,
                'd2': Discrete - 100,
                'd3': Discrete - 100
            })
        self.assertEqual(encoded_metas, {
            'c2': Continuous - 100,
            'd4': Discrete - 100,
        })

    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(
            encoded_attributes, {
                'c1': Continuous - 100,
                'd1': Discrete - 100,
                'd2': Discrete - 100,
                'd3': tuple('ghi')
            })
        self.assertEqual(encoded_metas, {
            'c2': Continuous - 100,
            'd4': Discrete - 100
        })

    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(
            encoded_attributes, {
                'c1': Continuous - 100,
                'd1': tuple('abc'),
                'd2': tuple('def'),
                'd3': tuple('ghi')
            })
        self.assertEqual(encoded_metas, {
            'c2': Continuous - 100,
            'd4': tuple('jkl')
        })

    def test_match_returns_1_if_everything_matches(self):
        self.handler.bind(SimpleWidget)

        # Attributes in values
        context = Mock(
            values=dict(with_metas=('d1', Discrete), required=('d1',
                                                               Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in values
        context = Mock(
            values=dict(with_metas=('d4', Discrete), required=('d1',
                                                               Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Attributes in lists
        context = Mock(values=dict(with_metas=[("d1", Discrete)]))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in lists
        context = Mock(values=dict(with_metas=[("d4", Discrete)]))
        self.assertEqual(1., self.handler.match(context, *self.args))

    def test_match_when_nothing_to_match(self):
        self.handler.bind(SimpleWidget)

        context = Mock(values={})
        self.assertEqual(0.1, self.handler.match(context, *self.args))

    def test_match_returns_zero_on_incompatible_context(self):
        self.handler.bind(SimpleWidget)

        # required
        context = Mock(
            values=dict(required=('u', Discrete), with_metas=('d1', Discrete)))
        self.assertEqual(0, self.handler.match(context, *self.args))

    def test_clone_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(
            self.domain,
            dict(text=('u', -2),
                 with_metas=[('d1', Discrete), ('d1', Continuous),
                             ('c1', Continuous), ('c1', Discrete)],
                 required=('u', Continuous)))

        new_values = self.handler.clone_context(context, *self.args).values

        self.assertEqual(new_values['text'], ('u', -2))
        self.assertEqual([('d1', Discrete), ('c1', Continuous)],
                         new_values['with_metas'])
        self.assertNotIn('required', new_values)

    def test_open_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(
            self.domain,
            dict(text=('u', -2),
                 with_metas=[('d1', Discrete), ('d2', Discrete)]))
        self.handler.global_contexts = \
            [context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        old_metas_list = widget.with_metas
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertIs(old_metas_list, widget.with_metas)

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('d2', Discrete)])

    def test_open_context_with_imperfect_match(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(
            None,
            dict(text=('u', -2),
                 with_metas=[('d1', Discrete), ('d1', Continuous),
                             ('c1', Continuous), ('c1', Discrete)]))
        self.handler.global_contexts = \
            [context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('c1', Continuous)])

    def test_open_context_not_first_match(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(
            None,
            dict(text=('u', -2),
                 with_metas=[('d1', Discrete), ('d1', Continuous),
                             ('c1', Continuous), ('c1', Discrete)]))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]
        self.handler.first_match = False

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('c1', Continuous)])

    def test_open_context_with_no_match(self):
        self.handler.bind(SimpleWidget)
        widget = SimpleWidget()
        self.handler.initialize(widget)
        widget.text = 'u'

        self.handler.open_context(widget, self.args[0])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [])
        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertEqual(context.values['text'], ('u', -2))

    def test_filter_value(self):
        setting = ContextSetting([])
        setting.name = "value"

        def test_filter(before_value, after_value):
            data = dict(value=before_value)
            self.handler.filter_value(setting, data, *self.args)
            self.assertEqual(data.get("value", None), after_value)

        # filter list values
        test_filter([], [])
        # When list contains attributes asa tuple of (name, type),
        # Attributes not present in domain should be filtered out
        test_filter([("d1", Discrete), ("d1", Continuous), ("c1", Continuous),
                     ("c1", Discrete)], [("d1", Discrete), ("c1", Continuous)])
        # All other values in list should remain
        test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])

    def test_filter_value_dict(self):
        setting = ContextSetting({})
        setting.name = "value"

        def test_filter(before_value, after_value):
            data = dict(value=before_value)
            self.handler.filter_value(setting, data, *self.args)
            self.assertEqual(data.get("value", None), after_value)

        # filter list values
        test_filter({}, {})
        # When list contains attributes asa tuple of (name, type),
        # Attributes not present in domain should be filtered out
        test_filter(
            {
                ("d1", Discrete): 1,
                ("d1", Continuous): 2,
                ("c1", Continuous): 3,
                ("c1", Discrete): 4
            }, {
                ("d1", Discrete): 1,
                ("c1", Continuous): 3
            })
        # All other values in list should remain
        test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])

    def test_encode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.encode_setting(None, setting, var)
        self.assertEqual(val, (var.name, 100 + vartype(var)))

        # Should not crash on anonymous variables
        with self.assertWarns(OrangeDeprecationWarning):
            var = ContinuousVariable()
        val = self.handler.encode_setting(None, setting, var)
        self.assertEqual(val, (var.name, 100 + vartype(var)))

    def test_encode_list_settings(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.encode_setting(None, setting, [None, var1, var2])
        self.assertEqual(val, ([
            None, (var1.name, 100 + vartype(var1)),
            (var2.name, 100 + vartype(var2))
        ], -3))

        a_list = [1, 2, 3]
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [1, 2, 3])
        self.assertIsNot(val, a_list)

        a_list = []
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [])
        self.assertIsNot(val, a_list)

        a_list = [None, None]
        val = self.handler.encode_setting(None, setting, a_list)
        self.assertEqual(val, [None, None])
        self.assertIsNot(val, a_list)

    def test_encode_dict_settings(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.encode_setting(None, setting, {var1: 1, var2: 2})
        self.assertEqual(val, ({
            (var1.name, 100 + vartype(var1)): 1,
            (var2.name, 100 + vartype(var2)): 2
        }, -4))

        a_dict = {1: 2, 2: 3, 3: 4}
        val = self.handler.encode_setting(None, setting, a_dict)
        self.assertEqual(val, ({1: 2, 2: 3, 3: 4}, -2))
        self.assertIsNot(val, a_dict)

        a_dict = {}
        val = self.handler.encode_setting(None, setting, a_dict)
        self.assertEqual(val, ({}, -4))
        self.assertIsNot(val, a_dict)

    def test_decode_setting(self):
        setting = ContextSetting(None)

        var = self.domain[0]
        val = self.handler.decode_setting(setting,
                                          (var.name, 100 + vartype(var)),
                                          self.domain)
        self.assertIs(val, var)

        all_metas_domain = Domain([], metas=[var])
        val = self.handler.decode_setting(setting,
                                          (var.name, 100 + vartype(var)),
                                          all_metas_domain)
        self.assertIs(val, var)

        self.assertRaises(ValueError, self.handler.decode_setting, setting,
                          (var.name, 100 + vartype(var)))
        self.handler.decode_setting(setting, None, None)

    def test_decode_list_setting(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.decode_setting(setting, ([
            None, (var1.name, 100 + vartype(var1)),
            (var2.name, 100 + vartype(var2))
        ], -3), self.domain)
        self.assertEqual(val, [None, var1, var2])

        val = self.handler.decode_setting(setting, [1, 2, 3], self.domain)
        self.assertEqual(val, [1, 2, 3])

        self.assertRaises(ValueError, self.handler.decode_setting, setting, ([
            None, (var1.name, 100 + vartype(var1)),
            (var2.name, 100 + vartype(var2))
        ], -3))
        val = self.handler.decode_setting(setting, ([None, None], -3), None)
        self.assertEqual(val, [None, None])

    def test_decode_dict_setting(self):
        setting = ContextSetting(None)

        var1, var2 = self.domain[:2]
        val = self.handler.decode_setting(
            setting, ({
                (var1.name, 100 + vartype(var1)): 1,
                (var2.name, 100 + vartype(var2)): 2
            }, -4), self.domain)
        self.assertEqual(val, {var1: 1, var2: 2})

        val = self.handler.decode_setting(setting, ({
            1: 2,
            2: 3,
            3: 4
        }, -2), self.domain)
        self.assertEqual(val, {1: 2, 2: 3, 3: 4})

        self.assertRaises(ValueError, self.handler.decode_setting, setting,
                          ({
                              (var1.name, 100 + vartype(var1)): 1,
                              (var2.name, 100 + vartype(var2)): 2
                          }, -4))

        val = self.handler.decode_setting(setting, ({1: 2, 2: 3, 3: 4}, -2))
        self.assertEqual(val, {1: 2, 2: 3, 3: 4})

    def test_backward_compatible_params(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            DomainContextHandler(metas_in_res=True)
            self.assertIn(OrangeDeprecationWarning, [x.category for x in w])

    def create_context(self, domain, values):
        if domain is None:
            domain = Domain([])

        context = self.handler.new_context(domain,
                                           *self.handler.encode_domain(domain))
        context.values = values
        return context
class DomainContextHandlerTestCase(TestCase):
    def setUp(self):
        self.domain = Domain(
            attributes=[ContinuousVariable('c1'),
                        DiscreteVariable('d1', values='abc'),
                        DiscreteVariable('d2', values='def')],
            class_vars=[DiscreteVariable('d3', values='ghi')],
            metas=[ContinuousVariable('c2'),
                   DiscreteVariable('d4', values='jkl')]
        )
        self.args = (self.domain,
                     {'c1': Continuous, 'd1': Discrete,
                      'd2': Discrete, 'd3': Discrete},
                     {'c2': Continuous, 'd4': Discrete, })
        self.handler = DomainContextHandler(metas_in_res=True)
        self.handler.read_defaults = lambda: None

    def test_encode_domain_with_match_none(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_NONE,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete,
                          'd2': Discrete, 'd3': Discrete})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete, })

    def test_encode_domain_with_match_class(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_CLASS,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete, 'd2': Discrete,
                          'd3': list('ghi')})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})

    def test_encode_domain_with_match_all(self):
        handler = DomainContextHandler(
            match_values=DomainContextHandler.MATCH_VALUES_ALL,
            metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': list('abc'),
                          'd2': list('def'), 'd3': list('ghi')})
        self.assertEqual(encoded_metas,
                         {'c2': Continuous, 'd4': list('jkl')})

    def test_encode_domain_with_false_attributes_in_res(self):
        handler = DomainContextHandler(attributes_in_res=False,
                                       metas_in_res=True)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes, {})
        self.assertEqual(encoded_metas, {'c2': Continuous, 'd4': Discrete})

    def test_encode_domain_with_false_metas_in_res(self):
        handler = DomainContextHandler(attributes_in_res=True,
                                       metas_in_res=False)

        encoded_attributes, encoded_metas = handler.encode_domain(self.domain)

        self.assertEqual(encoded_attributes,
                         {'c1': Continuous, 'd1': Discrete,
                          'd2': Discrete, 'd3': Discrete})
        self.assertEqual(encoded_metas, {})

    def test_match_returns_2_on_perfect_match(self):
        context = Mock(
            attributes=self.args[1], metas=self.args[2], values={})
        self.assertEqual(2., self.handler.match(context, *self.args))

    def test_match_returns_1_if_everything_matches(self):
        self.handler.bind(SimpleWidget)

        # Attributes in values
        context = Mock(values=dict(
            with_metas=('d1', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in values
        context = Mock(values=dict(
            with_metas=('d4', Discrete),
            required=('d1', Discrete)))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Attributes in lists
        context = Mock(values=dict(
            with_metas=[("d1", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

        # Metas in lists
        context = Mock(values=dict(
            with_metas=[("d4", Discrete)]
        ))
        self.assertEqual(1., self.handler.match(context, *self.args))

    def test_match_returns_point_1_when_nothing_to_match(self):
        self.handler.bind(SimpleWidget)

        context = Mock(values={})
        self.assertEqual(0.1, self.handler.match(context, *self.args))

    def test_match_returns_zero_on_incompatible_context(self):
        self.handler.bind(SimpleWidget)

        # required
        context = Mock(values=dict(required=('u', Discrete),
                                   with_metas=('d1', Discrete)))
        self.assertEqual(0, self.handler.match(context, *self.args))

        # selected if_selected
        context = Mock(values=dict(with_metas=('d1', Discrete),
                                   if_selected=[('u', Discrete)],
                                   selected=[0]))
        self.assertEqual(0, self.handler.match(context, *self.args))

        # unselected if_selected
        context = Mock(values=dict(with_metas=('d1', Discrete),
                                   if_selected=[('u', Discrete),
                                                ('d1', Discrete)],
                                   selected=[1]))
        self.assertAlmostEqual(0.667, self.handler.match(context, *self.args),
                               places=2)

    def test_clone_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)],
            required=('u', Continuous)
        ))

        new_values = self.handler.clone_context(context, *self.args).values

        self.assertEqual(new_values['text'], ('u', -2))
        self.assertEqual([('d1', Discrete), ('c1', Continuous)],
                         new_values['with_metas'])
        self.assertNotIn('required', new_values)

    def test_open_context(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(self.domain, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d2', Discrete)]
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('d2', Discrete)])

    def test_open_context_with_imperfect_match(self):
        self.handler.bind(SimpleWidget)
        context = self.create_context(None, dict(
            text=('u', -2),
            with_metas=[('d1', Discrete), ('d1', Continuous),
                        ('c1', Continuous), ('c1', Discrete)],
            if_selected=[('c1', Discrete), ('c1', Continuous),
                         ('d1', Discrete), ('d1', Continuous)],
            selected=[2],
        ))
        self.handler.global_contexts = \
            [Mock(values={}), context, Mock(values={})]

        widget = SimpleWidget()
        self.handler.initialize(widget)
        self.handler.open_context(widget, self.args[0])

        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [('d1', Discrete),
                                             ('c1', Continuous)])
        self.assertEqual(widget.if_selected, [('c1', Continuous),
                                              ('d1', Discrete)])
        self.assertEqual(widget.selected, [1])

    def test_open_context_with_no_match(self):
        self.handler.bind(SimpleWidget)
        widget = SimpleWidget()
        self.handler.initialize(widget)
        widget.text = 'u'

        self.handler.open_context(widget, self.args[0])

        self.assertEqual(widget.text, 'u')
        self.assertEqual(widget.with_metas, [])
        context = widget.current_context
        self.assertEqual(context.attributes, self.args[1])
        self.assertEqual(context.metas, self.args[2])
        self.assertSequenceEqual(context.ordered_domain,
                                 (('c1', Continuous), ('d1', Discrete),
                                  ('d2', Discrete), ('d3', Discrete),
                                  ('c2', Continuous), ('d4', Discrete)))
        self.assertEqual(context.values['text'], ('u', -2))

    def create_context(self, domain, values):
        if not domain:
            domain = Domain([])

        context = self.handler.new_context(domain,
                                           *self.handler.encode_domain(domain))
        context.values = values
        return context