Example #1
0
class OWNxExplorer(OWDataProjectionWidget):
    name = "Network Explorer"
    description = "Visually explore the network and its properties."
    icon = "icons/NetworkExplorer.svg"
    priority = 6420

    class Inputs:
        node_data = Input("Node Data", Table)
        node_subset = Input("Node Subset", Table)
        network = Input("Network", network.Graph, default=True)
        node_distances = Input("Node Distances", Orange.misc.DistMatrix)

    class Outputs(OWDataProjectionWidget.Outputs):
        subgraph = Output("Selected sub-network", network.Graph)
        unselected_subgraph = Output("Remaining sub-network", network.Graph)
        distances = Output("Distance matrix", Orange.misc.DistMatrix)

    UserAdviceMessages = [
        widget.Message('Double clicks select connected components',
                       widget.Message.Information),
    ]

    GRAPH_CLASS = GraphView
    graph = SettingProvider(GraphView)

    randomizePositions = Setting(True)
    mark_hops = Setting(1)
    mark_min_conn = Setting(5)
    mark_max_conn = Setting(5)
    mark_most_conn = Setting(1)

    alpha_value = 255  # Override the setting from parent

    class Warning(widget.OWWidget.Warning):
        distance_matrix_mismatch = widget.Msg(
            "Distance matrix size doesn't match the number of network nodes "
            " and will be ignored.")
        no_graph_found = widget.Msg(
            'Node data is given, graph data is missing')

    class Error(widget.OWWidget.Error):
        data_size_mismatch = widget.Msg(
            'Length of the data does not match the number of nodes.')
        network_too_large = widget.Msg('Network is too large to visualize.')
        single_node_graph = widget.Msg("I don't do single-node graphs today.")

    def __init__(self):
        # These are already needed in super().__init__()
        self.number_of_nodes = 0
        self.number_of_edges = 0
        self.nHighlighted = 0
        self.nSelected = 0
        self.nodes_per_edge = 0
        self.edges_per_node = 0

        self.mark_mode = 0
        self.mark_text = ""

        super().__init__()

        self.network = None
        self.node_data = None
        self.distance_matrix = None
        self.edges = None
        self.positions = None

        self._optimizer = None
        self._animation_thread = None
        self._stop_optimization = False

        self.marked_nodes = None
        self.searchStringTimer = QTimer(self)
        self.searchStringTimer.timeout.connect(self.update_marks)
        self.set_mark_mode()
        self.setMinimumWidth(600)

    def sizeHint(self):
        return QSize(800, 600)

    def _add_controls(self):
        self.gui = OWPlotGUI(self)
        self._add_info_box()
        self.gui.point_properties_box(self.controlArea)
        self._add_effects_box()
        self.gui.plot_properties_box(self.controlArea)
        gui.rubber(self.controlArea)
        self.gui.box_zoom_select(self.controlArea)
        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")
        self._add_mark_box()
        self.controls.attr_label.activated.connect(self.on_change_label_attr)

    def _add_info_box(self):
        info = gui.vBox(self.controlArea, True)
        gui.label(
            info, self,
            "Nodes: %(number_of_nodes)i (%(nodes_per_edge).2f per edge); "
            "%(nSelected)i selected")
        gui.label(
            info, self,
            "Edges: %(number_of_edges)i (%(edges_per_node).2f per node)")
        lbox = gui.hBox(info)
        self.relayout_button = gui.button(lbox,
                                          self,
                                          'Re-layout',
                                          callback=self.relayout,
                                          autoDefault=False)
        self.stop_button = gui.button(lbox,
                                      self,
                                      'Stop',
                                      callback=self.stop_relayout,
                                      autoDefault=False,
                                      hidden=True)
        self.randomize_cb = gui.checkBox(lbox, self, "randomizePositions",
                                         "Randomize positions")

    def _add_effects_box(self):
        gbox = self.gui.create_gridbox(self.controlArea, True)
        self.gui.add_widget(self.gui.PointSize, gbox)
        gbox.layout().itemAtPosition(1, 0).widget().setText("Node Size:")
        self.gui.add_control(gbox,
                             gui.hSlider,
                             "Edge width:",
                             master=self,
                             value='graph.edge_width',
                             minValue=1,
                             maxValue=10,
                             step=1,
                             callback=self.graph.update_edges)
        box = gui.vBox(None)
        gbox.layout().addWidget(box, 3, 0, 1, 2)
        gui.separator(box)
        self.checkbox_relative_edges = gui.checkBox(
            box,
            self,
            'graph.relative_edge_widths',
            'Scale edge widths to weights',
            callback=self.graph.update_edges)
        self.checkbox_show_weights = gui.checkBox(
            box,
            self,
            'graph.show_edge_weights',
            'Show edge weights',
            callback=self.graph.update_edge_labels)
        self.checkbox_show_weights = gui.checkBox(
            box,
            self,
            'graph.label_selected_edges',
            'Label only edges of selected nodes',
            callback=self.graph.update_edge_labels)

        # This is ugly: create a slider that controls alpha_value so that
        # parent can enable and disable it - although it's never added to any
        # layout and visible to the user
        gui.hSlider(None, self, "graph.alpha_value")

    def _add_mark_box(self):
        hbox = gui.hBox(None, box=True)
        self.mainArea.layout().addWidget(hbox)
        vbox = gui.hBox(hbox)

        def spin(value, label, minv, maxv):
            return gui.spin(vbox,
                            self,
                            value,
                            label=label,
                            minv=minv,
                            maxv=maxv,
                            step=1,
                            alignment=Qt.AlignRight,
                            callback=self.update_marks).box

        def text_line():
            def set_search_string_timer():
                self.searchStringTimer.stop()
                self.searchStringTimer.start(300)

            return gui.lineEdit(gui.hBox(vbox),
                                self,
                                "mark_text",
                                label="Text: ",
                                orientation=Qt.Horizontal,
                                minimumWidth=50,
                                callback=set_search_string_timer,
                                callbackOnType=True).box

        def mark_label_starts():
            txt = self.mark_text.lower()
            if not txt:
                return None
            labels = self.get_label_data()
            if labels is None:
                return None
            return [
                i for i, label in enumerate(labels)
                if label.lower().startswith(txt)
            ]

        def mark_label_contains():
            txt = self.mark_text.lower()
            if not txt:
                return None
            labels = self.get_label_data()
            if labels is None:
                return None
            return [
                i for i, label in enumerate(labels) if txt in label.lower()
            ]

        def mark_text():
            txt = self.mark_text.lower()
            if not txt or self.data is None:
                return None
            return [
                i for i, inst in enumerate(self.data)
                if txt in "\x00".join(map(str, inst.list)).lower()
            ]

        def mark_reachable():
            selected = self.graph.get_selection()
            if selected is None:
                return None
            return self.get_reachable(selected)

        def mark_close():
            selected = self.graph.get_selection()
            if selected is None:
                return None
            neighbours = set(selected)
            last_round = list(neighbours)
            for _ in range(self.mark_hops):
                next_round = set()
                for neigh in last_round:
                    next_round |= set(self.network[neigh])
                neighbours |= next_round
                last_round = next_round
            neighbours -= set(selected)
            return list(neighbours)

        def mark_from_input():
            if self.subset_data is None or self.data is None:
                return None
            ids = set(self.subset_data.ids)
            return [i for i, ex in enumerate(self.data) if ex.id in ids]

        def mark_most_connections():
            n = self.mark_most_conn
            if n >= self.number_of_nodes:
                return np.arange(self.number_of_nodes)
            degrees = np.array(self.network.degree())
            # pylint: disable=invalid-unary-operand-type
            min_degree = np.partition(degrees[:, 1].flatten(), -n)[-n]
            return degrees[degrees[:, 1] >= min_degree, 0]

        self.mark_criteria = [
            ("(Select criteria for marking)", None, lambda: []),
            ("Mark nodes whose label starts with", text_line(),
             mark_label_starts),
            ("Mark nodes whose label contains", text_line(),
             mark_label_contains),
            ("Mark nodes whose data that contains", text_line(), mark_text),
            ("Mark nodes reachable from selected", None, mark_reachable),
            ("Mark nodes in vicinity of selection",
             spin("mark_hops", "Number of hops:", 1, 20), mark_close),
            ("Mark nodes from subset signal", None, mark_from_input),
            ("Mark nodes with few connections",
             spin("mark_max_conn", "Max. connections:", 0, 1000), lambda: [
                 node for node, degree in self.network.degree()
                 if degree <= self.mark_max_conn
             ]),
            ("Mark nodes with many connections",
             spin("mark_min_conn", "Min. connections:", 1, 1000), lambda: [
                 node for node, degree in self.network.degree()
                 if degree >= self.mark_min_conn
             ]),
            ("Mark nodes with most connections",
             spin("mark_most_conn", "Number of marked:", 1,
                  1000), mark_most_connections),
            ("Mark nodes with more connections than any neighbour", None,
             lambda: [
                 node for node, degree in self.network.degree()
                 if degree > max(
                     (deg
                      for _, deg in self.network.degree(self.network[node])),
                     default=0)
             ]),
            ("Mark nodes with more connections than average neighbour", None,
             lambda: [
                 node for node, degree in self.network.degree()
                 if degree > np.mean([
                     deg for _, deg in self.network.degree(self.network[node])
                 ] or [0])
             ])
        ]
        cb = gui.comboBox(hbox,
                          self,
                          "mark_mode",
                          items=[item for item, *_ in self.mark_criteria],
                          maximumContentsLength=-1,
                          callback=self.set_mark_mode)
        hbox.layout().insertWidget(0, cb)

        gui.rubber(hbox)
        self.btselect = gui.button(hbox,
                                   self,
                                   "Select",
                                   callback=self.select_marked)
        self.btadd = gui.button(hbox,
                                self,
                                "Add to Selection",
                                callback=self.select_add_marked)
        self.btgroup = gui.button(hbox,
                                  self,
                                  "Add New Group",
                                  callback=self.select_as_group)

    def set_mark_mode(self, mode=None):
        if mode is not None:
            self.mark_mode = mode
        for i, (_, widget, _) in enumerate(self.mark_criteria):
            if widget:
                if i == self.mark_mode:
                    widget.show()
                else:
                    widget.hide()
        self.searchStringTimer.stop()
        self.update_marks()

    def update_marks(self):
        if self.network is None:
            return
        to_mark = self.mark_criteria[self.mark_mode][2]()
        if to_mark is None or not len(to_mark):
            self.marked_nodes = None
        else:
            self.marked_nodes = np.asarray(to_mark)
        self.graph.update_marks()
        if self.graph.label_only_selected:
            self.graph.update_labels()
        self.update_selection_buttons()

    def update_selection_buttons(self):
        if self.marked_nodes is None:
            self.btselect.hide()
            self.btadd.hide()
            self.btgroup.hide()
            return
        else:
            self.btselect.show()

        selection = self.graph.get_selection()
        if not len(selection) or np.max(selection) == 0:
            self.btadd.hide()
            self.btgroup.hide()
        elif np.max(selection) == 1:
            self.btadd.setText("Add to Selection")
            self.btadd.show()
            self.btgroup.hide()
        else:
            self.btadd.setText("Add to Group")
            self.btadd.show()
            self.btgroup.show()

    def selection_changed(self):
        super().selection_changed()
        self.update_selection_buttons()
        self.update_marks()

    def select_marked(self):
        self.graph.selection_select(self.marked_nodes)

    def select_add_marked(self):
        self.graph.selection_append(self.marked_nodes)

    def select_as_group(self):
        self.graph.selection_new_group(self.marked_nodes)

    def on_change_label_attr(self):
        if self.mark_mode in (1, 2):
            self.update_marks()

    @Inputs.node_data
    def set_node_data(self, data):
        self.node_data = data

    @Inputs.node_subset
    def set_node_subset(self, data):
        super().set_subset_data(data)

    @Inputs.node_distances
    def set_items_distance_matrix(self, matrix):
        self.distance_matrix = matrix
        self.positions = None

    @Inputs.network
    def set_graph(self, graph):
        def set_graph_none(error=None):
            if error is not None:
                error()
            self.network = None
            self.number_of_nodes = self.edges_per_node = 0
            self.number_of_edges = self.nodes_per_edge = 0

        def compute_stats():
            self.number_of_nodes = graph.number_of_nodes()
            self.number_of_edges = graph.number_of_edges()
            self.edges_per_node = self.number_of_edges / self.number_of_nodes
            self.nodes_per_edge = \
                self.number_of_nodes / max(1, self.number_of_edges)

        if not graph or graph.number_of_nodes == 0:
            set_graph_none()
            return
        if graph.number_of_nodes() + graph.number_of_edges() > 30000:
            set_graph_none(self.Error.network_too_large)
            return
        self.Error.clear()

        self.mark_text = ""
        self.set_mark_mode(0)
        self.network = graph
        compute_stats()
        self.positions = None

    def handleNewSignals(self):
        network = self.network

        def set_actual_data():
            self.closeContext()
            self.Error.data_size_mismatch.clear()
            self.Warning.no_graph_found.clear()
            self._invalid_data = False
            if network is None:
                if self.node_data is not None:
                    self.Warning.no_graph_found()
                return
            if self.node_data is not None:
                if len(self.node_data) != self.number_of_nodes:
                    self.Error.data_size_mismatch()
                    self._invalid_data = True
                    self.data = None
                else:
                    self.data = self.node_data
            if self.node_data is None:
                self.data = network.items()
            if self.data is not None:
                # Replicate the necessary parts of set_data
                self.valid_data = np.full(len(self.data), True, dtype=np.bool)
                self.init_attr_values()
                self.openContext(self.data)
                self.cb_class_density.setEnabled(self.can_draw_density())

        def set_actual_edges():
            def set_checkboxes(value):
                self.checkbox_show_weights.setEnabled(value)
                self.checkbox_relative_edges.setEnabled(value)

            self.Warning.distance_matrix_mismatch.clear()

            if self.network is None:
                self.edges = None
                set_checkboxes(False)
                return

            set_checkboxes(True)
            edges = network.edges(data='weight')
            if edges:
                row, col, data = zip(*edges)
                if all(w is None for w in data):
                    data = np.ones((len(data), ), dtype=float)
                self.edges = sp.coo_matrix((data, (row, col)))
            else:
                self.edges = sp.coo_matrix((0, 3))
            if self.distance_matrix is not None:
                if len(self.distance_matrix) != self.number_of_nodes:
                    self.Warning.distance_matrix_mismatch()
                else:
                    self.edges.data = np.fromiter(
                        (self.distance_matrix[u, v]
                         for u, v in zip(self.edges.row, self.edges.col)),
                        dtype=np.int32,
                        count=len(self.edges.row))
            if np.allclose(self.edges.data, 0):
                self.edges.data[:] = 1
                set_checkboxes(False)
            elif len(set(self.edges.data)) == 1:
                set_checkboxes(False)

        self.stop_optimization_and_wait()
        set_actual_data()
        if self.positions is None:
            set_actual_edges()
            self.set_random_positions()
            self.graph.reset_graph()
            self.relayout()
        else:
            self.graph.update_point_props()
        self.update_marks()
        self.update_selection_buttons()

    def set_random_positions(self):
        self.positions = np.random.uniform(size=(self.number_of_nodes, 2))

    def get_reachable(self, initial):
        to_check = list(initial)
        reachable = set(to_check)
        for node in to_check:
            new_checks = set(self.network[node]) - reachable
            to_check += new_checks
            reachable |= new_checks
        return list(reachable)

    def send_data(self):
        super().send_data()

        Outputs = self.Outputs
        selected_indices = self.graph.get_selection()
        if selected_indices is None or len(selected_indices) == 0:
            Outputs.subgraph.send(None)
            Outputs.unselected_subgraph.send(self.network)
            Outputs.distances.send(None)
            return

        selection = self.graph.selection
        subgraph = self.network.subgraph(selected_indices)
        sub_data = \
            self._get_selected_data(self.data, selected_indices, selection)
        subgraph.set_items(sub_data)
        Outputs.subgraph.send(subgraph)
        Outputs.unselected_subgraph.send(
            self.network.subgraph(np.flatnonzero(selection == 0)))
        distances = self.distance_matrix
        if distances is None:
            Outputs.distances.send(None)
        else:
            Outputs.distances.send(
                distances.submatrix(sorted(selected_indices)))

    def get_coordinates_data(self):
        if self.positions is not None:
            return self.positions.T
        else:
            return None, None

    def get_embedding(self):
        return self.positions

    def get_subset_mask(self):
        if self.data is None:
            return None
        return super().get_subset_mask()

    def get_edges(self):
        return self.edges

    def get_marked_nodes(self):
        return self.marked_nodes

    def set_buttons(self, running):
        self.stop_button.setHidden(not running)
        self.relayout_button.setHidden(running)

    def stop_relayout(self):
        self._stop_optimization = True
        self.set_buttons(running=False)

    # TODO: Stop relayout if new data is received
    def relayout(self):
        if self.edges is None:
            return
        if self.randomizePositions:
            self.set_random_positions()
        self.progressbar = gui.ProgressBar(self, FR_ITERATIONS)
        self.set_buttons(running=True)
        self._stop_optimization = False

        Simplifications = self.graph.Simplifications
        self.graph.set_simplifications(Simplifications.NoDensity +
                                       Simplifications.NoLabels *
                                       (len(self.graph.labels) > 20) +
                                       Simplifications.NoEdgeLabels *
                                       (len(self.graph.edge_labels) > 20) +
                                       Simplifications.NoEdges *
                                       (self.number_of_edges > 1000))

        large_graph = self.number_of_nodes + self.number_of_edges > 20000
        iterations = 5 if large_graph else FR_ITERATIONS

        class LayoutOptimizer(QObject):
            update = Signal(np.ndarray, float)
            done = Signal(np.ndarray)
            stopped = Signal()

            def __init__(self, widget):
                super().__init__()
                self.widget = widget

            def send_update(self, positions, progress):
                if not large_graph:
                    self.update.emit(np.array(positions), progress)
                return not self.widget._stop_optimization

            def run(self):
                widget = self.widget
                edges = widget.edges
                positions = np.array(
                    fruchterman_reingold(
                        edges.data,
                        edges.row,
                        edges.col,
                        1 / np.sqrt(widget.number_of_nodes),  # k
                        widget.positions,
                        np.array([], dtype=np.int32),  # fixed
                        iterations,
                        0.1,  # sample ratio
                        self.send_update,
                        0.25))
                self.done.emit(positions)
                self.stopped.emit()

        def update(positions, progress):
            self.progressbar.advance(progress)
            self.positions = positions
            self.graph.update_coordinates()

        def done(positions):
            self.positions = positions
            self.set_buttons(running=False)
            self.graph.set_simplifications(
                self.graph.Simplifications.NoSimplifications)
            self.graph.update_coordinates()
            self.progressbar.finish()

        def thread_finished():
            self._optimizer = None
            self._animation_thread = None

        self._optimizer = LayoutOptimizer(self)
        self._animation_thread = QThread()
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._animation_thread.quit)
        self._optimizer.moveToThread(self._animation_thread)
        self._animation_thread.started.connect(self._optimizer.run)
        self._animation_thread.finished.connect(thread_finished)
        self._animation_thread.start()

    def stop_optimization_and_wait(self):
        if self._animation_thread is not None:
            self._stop_optimization = True
            self._animation_thread.quit()
            self._animation_thread.wait()
            self._animation_thread = None

    def onDeleteWidget(self):
        self.stop_optimization_and_wait()
        super().onDeleteWidget()

    def send_report(self):
        self.report_items('Graph info', [
            ("Number of vertices", self.network.number_of_nodes()),
            ("Number of edges", self.network.number_of_edges()),
            ("Vertices per edge", round(self.nodes_per_edge, 3)),
            ("Edges per vertex", round(self.edges_per_node, 3)),
        ])
        self.report_data("Data", self.network.items())
        if any((self.attr_color, self.attr_shape, self.attr_size,
                self.attr_label)):
            self.report_items(
                "Visual settings",
                [("Color", self._get_caption_var_name(self.attr_color)),
                 ("Label", self._get_caption_var_name(self.attr_label)),
                 ("Shape", self._get_caption_var_name(self.attr_shape)),
                 ("Size", self._get_caption_var_name(self.attr_size))])
        self.report_plot()
class OWInterpolate(widget.OWWidget):
    name = 'Interpolate'
    description = 'Induce missing values (nan) in the time series by interpolation.'
    icon = 'icons/Interpolate.svg'
    priority = 15

    inputs = [("Time series", Table, 'set_data')]
    outputs = [
        (Output.TIMESERIES, Timeseries),
        (Output.INTERPOLATED, Timeseries),  # TODO
        # (Output.INTERPOLATOR, Model)     # TODO
    ]

    want_main_area = False
    resizing_enabled = False

    interpolation = settings.Setting('linear')
    multivariate = settings.Setting(False)
    autoapply = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            'While you can freely choose the interpolation method '
            'for continuous variables, discrete variables can only '
            'be interpolated with the <i>nearest</i> method or '
            'their mode (i.e. the most frequent value).', 'discrete-interp',
            widget.Message.Warning)
    ]

    def __init__(self):
        self.data = None
        box = gui.vBox(self.controlArea, 'Interpolation Parameters')
        gui.comboBox(box,
                     self,
                     'interpolation',
                     callback=self.on_changed,
                     label='Interpolation of missing values:',
                     sendSelectedValue=True,
                     orientation=Qt.Horizontal,
                     items=('linear', 'cubic', 'nearest', 'mean'))
        gui.checkBox(box,
                     self,
                     'multivariate',
                     label='Multi-variate interpolation',
                     callback=self.on_changed)
        gui.auto_commit(box, self, 'autoapply', 'Apply')

    def set_data(self, data):
        self.data = None if data is None else Timeseries.from_data_table(data)
        self.on_changed()

    def on_changed(self):
        self.commit()

    def commit(self):
        data = self.data
        if data is not None:
            data = data.copy()
            data.set_interpolation(self.interpolation, self.multivariate)
        self.send(Output.TIMESERIES, data)
        self.send(Output.INTERPOLATED, try_(lambda: data.interp()) or None)
Example #3
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001
    keywords = []

    class Inputs:
        evaluation_results = Input("Evaluation Results",
                                   Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settings_version = 1
    settingsHandler = ClassValuesContextHandler()

    selected_learner = Setting([0], schema_only=True)
    selection = ContextSetting(set())
    selected_quantity = Setting(0)
    append_predictions = Setting(True)
    append_probabilities = Setting(False)
    autocommit = Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg(
            "Evaluation Results input contains invalid values")
        empty_input = widget.Msg("Empty result on input. Nothing to display.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(self.controlArea,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        box='Learners',
                                        callback=self._learner_changed)

        self.outputbox = gui.vBox(self.buttonsArea)
        box = gui.hBox(self.outputbox)
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_apply(self.outputbox, self, "autocommit", box=False)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     label="Show: ",
                     orientation=Qt.Horizontal,
                     callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

    @staticmethod
    def sizeHint():
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""
        # false positive, pylint: disable=no-member
        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        self.Error.no_regression.clear()
        self.Error.empty_input.clear()
        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        elif results is not None and not results.actual.size:
            self.Error.empty_input()
            data = results = None

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        self.Error.invalid_values(shown=nan_values)

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
            return

        self.report_button.setDisabled(False)

        nmodels = results.predicted.shape[0]
        self.headers = class_values + \
                       (unicodedata.lookup("N-ARY SUMMATION"), )

        # NOTE: The 'learner_names' is set in 'Test Learners' widget.
        self.learners = getattr(results, "learner_names",
                                [f"Learner #{i + 1}" for i in range(nmodels)])

        self._init_table(len(class_values))
        self.openContext(data.domain.class_var)
        if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners):
            if self.learners:
                self.selected_learner[:] = [0]
        else:
            self.selected_learner[:] = prev_sel_learner
        self._update()
        self._set_selection()
        self.commit.now()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas
        attrs = self.data.domain.attributes
        names = [var.name for var in chain(metas, [class_var], attrs)]

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            proposed = "{}({})".format(class_var.name, learner_name)
            name = get_unique_names(names, proposed)
            var = Orange.data.DiscreteVariable(name, class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [
                Orange.data.ContinuousVariable("p({})".format(value))
                for value in class_var.values
            ]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if extra:
            with data.unlocked(data.metas):
                data.metas[:, len(self.data.domain.metas):] = \
                    np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    @gui.deferred
    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit.deferred()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit.deferred()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            maxval = normalized[diag].max()
            if maxval > 0:
                colors[diag] = normalized[diag] / maxval

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    # bkcolor is light-ish so use a black text
                    item.setData(QBrush(Qt.black), Qt.ForegroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner[0]],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Example #4
0
class OWPreprocess(OWWidget):

    name = 'Preprocess Text'
    description = 'Construct a text pre-processing pipeline.'
    icon = 'icons/TextPreprocess.svg'
    priority = 200

    class Inputs:
        corpus = Input("Corpus", Corpus)

    class Outputs:
        corpus = Output("Corpus", Corpus)

    autocommit = settings.Setting(True)

    preprocessors = [
        TransformationModule,
        TokenizerModule,
        NormalizationModule,
        FilteringModule,
        NgramsModule,
        POSTaggingModule,
    ]

    transformers = settings.SettingProvider(TransformationModule)
    tokenizer = settings.SettingProvider(TokenizerModule)
    normalizer = settings.SettingProvider(NormalizationModule)
    filters = settings.SettingProvider(FilteringModule)
    ngrams_range = settings.SettingProvider(NgramsModule)
    pos_tagger = settings.SettingProvider(POSTaggingModule)

    control_area_width = 180
    buttons_area_orientation = Qt.Vertical

    UserAdviceMessages = [
        widget.Message(
            "Some preprocessing methods require data (like word relationships, stop words, "
            "punctuation rules etc.) from the NLTK package. This data was downloaded "
            "to: {}".format(nltk_data_dir()), "nltk_data")
    ]

    class Error(OWWidget.Error):
        stanford_tagger = Msg("Problem while loading Stanford POS Tagger\n{}")
        stopwords_encoding = Msg(
            "Invalid stopwords file encoding. Please save the file as UTF-8 and try again."
        )
        lexicon_encoding = Msg(
            "Invalid lexicon file encoding. Please save the file as UTF-8 and try again."
        )
        error_reading_stopwords = Msg("Error reading file: {}")
        error_reading_lexicon = Msg("Error reading file: {}")

    class Warning(OWWidget.Warning):
        no_token_left = Msg(
            'No tokens on output! Please, change configuration.')
        udpipe_offline = Msg(
            'No internet connection! UDPipe now only works with local models.')
        udpipe_offline_no_models = Msg(
            'No internet connection and no local UDPipe models are available.')

    def __init__(self, parent=None):
        super().__init__(parent)
        self.corpus = None
        self.initial_ngram_range = None  # initial range of input corpus — used for inplace
        self.preprocessor = preprocess.Preprocessor()

        # -- INFO --
        info_box = gui.widgetBox(self.controlArea, 'Info')
        info_box.setFixedWidth(self.control_area_width)
        self.controlArea.layout().addStretch()
        self.info_label = gui.label(info_box, self, '')
        self.update_info()

        # -- PIPELINE --
        frame = QFrame()
        frame.setContentsMargins(0, 0, 0, 0)
        frame.setFrameStyle(QFrame.Box)
        frame.setStyleSheet('.QFrame { border: 1px solid #B3B3B3; }')
        frame_layout = QVBoxLayout()
        frame_layout.setContentsMargins(0, 0, 0, 0)
        frame_layout.setSpacing(0)
        frame.setLayout(frame_layout)

        self.stages = []
        for stage in self.preprocessors:
            widget = stage(self)
            self.stages.append(widget)
            setattr(self, stage.attribute, widget)
            frame_layout.addWidget(widget)
            widget.change_signal.connect(self.settings_invalidated)

        frame_layout.addStretch()
        self.scroll = QScrollArea()
        self.scroll.setWidget(frame)
        self.scroll.setWidgetResizable(True)
        self.scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.scroll.resize(frame_layout.sizeHint())
        self.scroll.setMinimumHeight(500)
        self.set_minimal_width()
        self.mainArea.layout().sizeHint()
        self.mainArea.layout().addWidget(self.scroll)

        # Buttons area
        self.report_button.setFixedWidth(self.control_area_width)

        commit_button = gui.auto_commit(self.buttonsArea,
                                        self,
                                        'autocommit',
                                        'Commit',
                                        box=False)
        commit_button.setFixedWidth(self.control_area_width)

        self.buttonsArea.layout().addWidget(commit_button)

    @Inputs.corpus
    def set_data(self, data=None):
        self.corpus = data.copy() if data is not None else None
        self.initial_ngram_range = data.ngram_range if data is not None else None
        self.commit()

    def update_info(self, corpus=None):
        if corpus is not None:
            info = 'Document count: {}\n' \
                   'Total tokens: {}\n'\
                   'Total types: {}'\
                   .format(len(corpus), sum(map(len, corpus.tokens)), len(corpus.dictionary))
        else:
            info = 'No corpus.'
        self.info_label.setText(info)

    def commit(self):
        self.Warning.no_token_left.clear()
        if self.corpus is not None:
            self.apply()
        else:
            self.update_info()
            self.Outputs.corpus.send(None)

    def apply(self):
        self.preprocess()

    @asynchronous
    def preprocess(self):
        for module in self.stages:
            setattr(self.preprocessor, module.attribute, module.value)
        self.corpus.pos_tags = None  # reset pos_tags and ngrams_range
        self.corpus.ngram_range = self.initial_ngram_range
        return self.preprocessor(self.corpus,
                                 inplace=True,
                                 on_progress=self.on_progress)

    @preprocess.on_start
    def on_start(self):
        self.progressBarInit()

    @preprocess.callback
    def on_progress(self, i):
        self.progressBarSet(i)

    @preprocess.on_result
    def on_result(self, result):
        self.update_info(result)
        if result is not None and len(result.dictionary) == 0:
            self.Warning.no_token_left()
            result = None
        self.Outputs.corpus.send(result)
        self.progressBarFinished()

    def set_minimal_width(self):
        max_width = 250
        for widget in self.stages:
            if widget.enabled:
                max_width = max(max_width, widget.sizeHint().width())
        self.scroll.setMinimumWidth(max_width + 20)

    @pyqtSlot()
    def settings_invalidated(self):
        self.set_minimal_width()
        self.commit()

    def send_report(self):
        self.report_items('Preprocessor', self.preprocessor.report())
Example #5
0
class OWMap(widget.OWWidget):
    name = 'Geo Map'
    description = 'Show data points on a world map.'
    icon = "icons/GeoMap.svg"
    priority = 100

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        learner = Input("Learner", Learner)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    replaces = [
        "Orange.widgets.visualize.owmap.OWMap",
    ]

    settingsHandler = settings.DomainContextHandler()

    want_main_area = True

    autocommit = settings.Setting(True)
    tile_provider = settings.Setting('Black and white')
    lat_attr = settings.ContextSetting('')
    lon_attr = settings.ContextSetting('')
    class_attr = settings.ContextSetting('(None)')
    color_attr = settings.ContextSetting('')
    label_attr = settings.ContextSetting('')
    shape_attr = settings.ContextSetting('')
    size_attr = settings.ContextSetting('')
    opacity = settings.Setting(100)
    zoom = settings.Setting(100)
    jittering = settings.Setting(0)
    cluster_points = settings.Setting(False)
    show_legend = settings.Setting(True)

    TILE_PROVIDERS = OrderedDict((
        ('Black and white', 'OpenStreetMap.BlackAndWhite'),
        ('OpenStreetMap', 'OpenStreetMap.Mapnik'),
        ('Topographic', 'Thunderforest.OpenCycleMap'),
        ('Topographic 2', 'Thunderforest.Outdoors'),
        ('Satellite', 'Esri.WorldImagery'),
        ('Print', 'Stamen.TonerLite'),
        ('Dark', 'CartoDB.DarkMatter'),
        ('Watercolor', 'Stamen.Watercolor'),
    ))

    class Error(widget.OWWidget.Error):
        model_error = widget.Msg("Error predicting: {}")
        learner_error = widget.Msg("Error modelling: {}")

    class Warning(widget.OWWidget.Warning):
        all_nan_slice = widget.Msg(
            'Latitude and/or longitude has no defined values (is all-NaN)')

    UserAdviceMessages = [
        widget.Message(
            'Select markers by holding <b><kbd>Shift</kbd></b> key and dragging '
            'a rectangle around them. Clear the selection by clicking anywhere.',
            'shift-selection')
    ]

    graph_name = "map"

    def __init__(self):
        super().__init__()
        self.map = map = LeafletMap(self)  # type: LeafletMap
        self.mainArea.layout().addWidget(map)
        self.selection = None
        self.data = None
        self.learner = None

        def selectionChanged(indices):
            self.selection = self.data[
                indices] if self.data is not None and indices else None
            self._indices = indices
            self.commit()

        map.selectionChanged.connect(selectionChanged)

        def _set_map_provider():
            map.set_map_provider(self.TILE_PROVIDERS[self.tile_provider])

        box = gui.vBox(self.controlArea, 'Map')
        gui.comboBox(box,
                     self,
                     'tile_provider',
                     orientation=Qt.Horizontal,
                     label='Map:',
                     items=tuple(self.TILE_PROVIDERS.keys()),
                     sendSelectedValue=True,
                     callback=_set_map_provider)

        self._latlon_model = DomainModel(parent=self,
                                         valid_types=ContinuousVariable)
        self._class_model = DomainModel(parent=self,
                                        placeholder='(None)',
                                        valid_types=DomainModel.PRIMITIVE)
        self._color_model = DomainModel(parent=self,
                                        placeholder='(Same color)',
                                        valid_types=DomainModel.PRIMITIVE)
        self._shape_model = DomainModel(parent=self,
                                        placeholder='(Same shape)',
                                        valid_types=DiscreteVariable)
        self._size_model = DomainModel(parent=self,
                                       placeholder='(Same size)',
                                       valid_types=ContinuousVariable)
        self._label_model = DomainModel(parent=self, placeholder='(No labels)')

        def _set_lat_long():
            self.map.set_data(self.data, self.lat_attr, self.lon_attr)
            self.train_model()

        self._combo_lat = combo = gui.comboBox(box,
                                               self,
                                               'lat_attr',
                                               orientation=Qt.Horizontal,
                                               label='Latitude:',
                                               sendSelectedValue=True,
                                               callback=_set_lat_long)
        combo.setModel(self._latlon_model)
        self._combo_lon = combo = gui.comboBox(box,
                                               self,
                                               'lon_attr',
                                               orientation=Qt.Horizontal,
                                               label='Longitude:',
                                               sendSelectedValue=True,
                                               callback=_set_lat_long)
        combo.setModel(self._latlon_model)

        def _toggle_legend():
            self.map.toggle_legend(self.show_legend)

        gui.checkBox(box,
                     self,
                     'show_legend',
                     label='Show legend',
                     callback=_toggle_legend)

        box = gui.vBox(self.controlArea, 'Overlay')
        self._combo_class = combo = gui.comboBox(box,
                                                 self,
                                                 'class_attr',
                                                 orientation=Qt.Horizontal,
                                                 label='Target:',
                                                 sendSelectedValue=True,
                                                 callback=self.train_model)
        self.controls.class_attr.setModel(self._class_model)
        self.set_learner(self.learner)

        box = gui.vBox(self.controlArea, 'Points')
        self._combo_color = combo = gui.comboBox(
            box,
            self,
            'color_attr',
            orientation=Qt.Horizontal,
            label='Color:',
            sendSelectedValue=True,
            callback=lambda: self.map.set_marker_color(self.color_attr))
        combo.setModel(self._color_model)
        self._combo_label = combo = gui.comboBox(
            box,
            self,
            'label_attr',
            orientation=Qt.Horizontal,
            label='Label:',
            sendSelectedValue=True,
            callback=lambda: self.map.set_marker_label(self.label_attr))
        combo.setModel(self._label_model)
        self._combo_shape = combo = gui.comboBox(
            box,
            self,
            'shape_attr',
            orientation=Qt.Horizontal,
            label='Shape:',
            sendSelectedValue=True,
            callback=lambda: self.map.set_marker_shape(self.shape_attr))
        combo.setModel(self._shape_model)
        self._combo_size = combo = gui.comboBox(
            box,
            self,
            'size_attr',
            orientation=Qt.Horizontal,
            label='Size:',
            sendSelectedValue=True,
            callback=lambda: self.map.set_marker_size(self.size_attr))
        combo.setModel(self._size_model)

        def _set_opacity():
            map.set_marker_opacity(self.opacity)

        def _set_zoom():
            map.set_marker_size_coefficient(self.zoom)

        def _set_jittering():
            map.set_jittering(self.jittering)

        def _set_clustering():
            map.set_clustering(self.cluster_points)

        self._opacity_slider = gui.hSlider(box,
                                           self,
                                           'opacity',
                                           None,
                                           1,
                                           100,
                                           5,
                                           label='Opacity:',
                                           labelFormat=' %d%%',
                                           callback=_set_opacity)
        self._zoom_slider = gui.valueSlider(box,
                                            self,
                                            'zoom',
                                            None,
                                            values=(20, 50, 100, 200, 300, 400,
                                                    500, 700, 1000),
                                            label='Symbol size:',
                                            labelFormat=' %d%%',
                                            callback=_set_zoom)
        self._jittering = gui.valueSlider(box,
                                          self,
                                          'jittering',
                                          label='Jittering:',
                                          values=(0, .5, 1, 2, 5),
                                          labelFormat=' %.1f%%',
                                          ticks=True,
                                          callback=_set_jittering)
        self._clustering_check = gui.checkBox(box,
                                              self,
                                              'cluster_points',
                                              label='Cluster points',
                                              callback=_set_clustering)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send Selection')

        QTimer.singleShot(0, _set_map_provider)
        QTimer.singleShot(0, _toggle_legend)
        QTimer.singleShot(0, _set_opacity)
        QTimer.singleShot(0, _set_zoom)
        QTimer.singleShot(0, _set_jittering)
        QTimer.singleShot(0, _set_clustering)

    autocommit = settings.Setting(True)

    def __del__(self):
        self.progressBarFinished(None)
        self.map = None

    def commit(self):
        self.Outputs.selected_data.send(self.selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, self._indices))

    @Inputs.data
    def set_data(self, data):
        self.data = data

        self.closeContext()

        if data is None or not len(data):
            return self.clear()

        domain = data is not None and data.domain
        for model in (self._latlon_model, self._class_model, self._color_model,
                      self._shape_model, self._size_model, self._label_model):
            model.set_domain(domain)

        lat, lon = find_lat_lon(data)
        if lat or lon:
            self._combo_lat.setCurrentIndex(
                -1 if lat is None else self._latlon_model.indexOf(lat))
            self._combo_lon.setCurrentIndex(
                -1 if lat is None else self._latlon_model.indexOf(lon))
            self.lat_attr = lat.name
            self.lon_attr = lon.name

        if data.domain.class_var:
            self.color_attr = data.domain.class_var.name
        elif len(self._color_model):
            self._combo_color.setCurrentIndex(0)
        if len(self._shape_model):
            self._combo_shape.setCurrentIndex(0)
        if len(self._size_model):
            self._combo_size.setCurrentIndex(0)
        if len(self._label_model):
            self._combo_label.setCurrentIndex(0)
        if len(self._class_model):
            self._combo_class.setCurrentIndex(0)

        self.openContext(data)

        self.map.set_data(self.data, self.lat_attr, self.lon_attr)
        self.map.set_marker_color(self.color_attr, update=False)
        self.map.set_marker_label(self.label_attr, update=False)
        self.map.set_marker_shape(self.shape_attr, update=False)
        self.map.set_marker_size(self.size_attr, update=True)

    @Inputs.data_subset
    def set_subset(self, subset):
        self.map.set_subset_ids(
            subset.ids if subset is not None else np.array([]))

    def handleNewSignals(self):
        super().handleNewSignals()
        self.train_model()

    @Inputs.learner
    def set_learner(self, learner):
        self.learner = learner
        self.controls.class_attr.setEnabled(learner is not None)
        self.controls.class_attr.setToolTip(
            'Needs a Learner input for modelling.' if learner is None else '')

    def train_model(self):
        model = None
        self.Error.clear()
        if self.data and self.learner and self.class_attr != '(None)':
            domain = self.data.domain
            if self.lat_attr and self.lon_attr and self.class_attr in domain:
                domain = Domain([domain[self.lat_attr], domain[self.lon_attr]],
                                [domain[self.class_attr]])  # I am retarded
                train = Table.from_table(domain, self.data)
                try:
                    model = self.learner(train)
                except Exception as e:
                    self.Error.learner_error(e)
        self.map.set_model(model)

    def disable_some_controls(self, disabled):
        tooltip = ("Available when the zoom is close enough to have "
                   "<{} points in the viewport.".format(
                       self.map.N_POINTS_PER_ITER) if disabled else '')
        for widget in (self._combo_label, self._combo_shape,
                       self._clustering_check):
            widget.setDisabled(disabled)
            widget.setToolTip(tooltip)

    def clear(self):
        self.map.set_data(None, '', '')
        for model in (self._latlon_model, self._class_model, self._color_model,
                      self._shape_model, self._size_model, self._label_model):
            model.set_domain(None)
        self.lat_attr = self.lon_attr = self.class_attr = self.color_attr = \
        self.label_attr = self.shape_attr = self.size_attr = None
Example #6
0
class OWMergeData(widget.OWWidget):
    name = "Merge Data"
    description = "Merge datasets based on the values of selected features."
    icon = "icons/MergeData.svg"
    priority = 1110
    keywords = ["join"]

    class Inputs:
        data = Input("Data",
                     Orange.data.Table,
                     default=True,
                     replaces=["Data A"])
        extra_data = Input("Extra Data",
                           Orange.data.Table,
                           replaces=["Data B"])

    class Outputs:
        data = Output(
            "Data",
            Orange.data.Table,
            replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"])

    LeftJoin, InnerJoin, OuterJoin = range(3)
    OptionNames = ("Append columns from Extra data",
                   "Find matching pairs of rows", "Concatenate tables")
    OptionDescriptions = (
        "The first table may contain, for instance, city names,\n"
        "and the second would be a list of cities and their coordinates.\n"
        "Columns with coordinates would then be appended to the output.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains matched instances. Rows without matches are removed.",
        "Input tables contain different features describing the same data "
        "instances.\n"
        "Output contains all instances. Data from merged instances is "
        "merged into single rows.")

    UserAdviceMessages = [
        widget.Message("Confused about merging options?\nSee the tooltips!",
                       "merging_types")
    ]

    settingsHandler = MergeDataContextHandler()
    attr_pairs = ContextSetting(None, schema_only=True)
    merging = Setting(LeftJoin)
    auto_apply = Setting(True)
    settings_version = 2

    want_main_area = False
    resizing_enabled = False

    class Warning(widget.OWWidget.Warning):
        renamed_vars = Msg("Some variables have been renamed "
                           "to avoid duplicates.\n{}")

    class Error(widget.OWWidget.Error):
        matching_numeric_with_nonnum = Msg(
            "Numeric and non-numeric columns ({} and {}) cannot be matched.")
        matching_index_with_sth = Msg("Row index cannot be matched with {}.")
        matching_id_with_sth = Msg("Instance cannot be matched with {}.")
        nonunique_left = Msg(
            "Some combinations of values on the left appear in multiple rows.\n"
            "For this type of merging, every possible combination of values "
            "on the left should appear at most once.")
        nonunique_right = Msg(
            "Some combinations of values on the right appear in multiple rows."
            "\n"
            "Every possible combination of values on the right should appear "
            "at most once.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.extra_data = None

        content = [
            INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES,
            DomainModel.METAS
        ]
        self.model = DomainModelWithTooltips(content)
        self.extra_model = DomainModelWithTooltips(content)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        grp = gui.radioButtons(self.controlArea,
                               self,
                               "merging",
                               box="Merging",
                               btnLabels=self.OptionNames,
                               tooltips=self.OptionDescriptions,
                               callback=self.change_merging)

        self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "",
                                       "matches")
        self.attr_boxes.add_row()
        box = gui.vBox(self.controlArea, box="Row matching")
        box.layout().addWidget(self.attr_boxes)

        gui.auto_apply(self.buttonsArea, self)
        # connect after wrapping self.commit with gui.auto_commit!
        self.attr_boxes.vars_changed.connect(self.commit)
        self.attr_boxes.vars_changed.connect(self.store_combo_state)
        self.settingsAboutToBePacked.connect(self.store_combo_state)

    def change_merging(self):
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.data = data
        self.model.set_domain(data.domain if data else None)

    @Inputs.extra_data
    @check_sql_input
    def set_extra_data(self, data):
        self.extra_data = data
        self.extra_model.set_domain(data.domain if data else None)

    def store_combo_state(self):
        self.attr_pairs = self.attr_boxes.current_state()

    def handleNewSignals(self):
        self.closeContext()
        self.attr_pairs = [self._find_best_match()]
        self.openContext(self.data and self.data.domain, self.extra_data
                         and self.extra_data.domain)
        self.attr_boxes.set_state(self.attr_pairs)

        summary, details, kwargs = self.info.NoInput, "", {}
        if self.data or self.extra_data:
            n_data = len(self.data) if self.data else 0
            n_extra_data = len(self.extra_data) if self.extra_data else 0
            summary = f"{self.info.format_number(n_data)}, " \
                      f"{self.info.format_number(n_extra_data)}"
            kwargs = {"format": Qt.RichText}
            details = format_multiple_summaries([("Data", self.data),
                                                 ("Extra data",
                                                  self.extra_data)])
        self.info.set_input_summary(summary, details, **kwargs)

        self.unconditional_commit()

    def _find_best_match(self):
        def get_unique_str_metas_names(model_):
            return [m for m in model_ if isinstance(m, StringVariable)]

        attr, extra_attr, n_max_intersect = INDEX, INDEX, 0
        str_metas = get_unique_str_metas_names(self.model)
        extra_str_metas = get_unique_str_metas_names(self.extra_model)
        for m_a, m_b in product(str_metas, extra_str_metas):
            col = self.data[:, m_a].metas
            extra_col = self.extra_data[:, m_b].metas
            if col.size and extra_col.size \
                    and isinstance(col[0][0], str) \
                    and isinstance(extra_col[0][0], str):
                n_inter = len(np.intersect1d(col, extra_col))
                if n_inter > n_max_intersect:
                    n_max_intersect, attr, extra_attr = n_inter, m_a, m_b
        return attr, extra_attr

    def commit(self):
        self.clear_messages()
        merged = self.merge() if self.data and self.extra_data else None
        self.Outputs.data.send(merged)
        details = format_summary_details(merged) if merged else ""
        summary = len(merged) if merged else self.info.NoOutput
        self.info.set_output_summary(summary, details)

    def send_report(self):
        # pylint: disable=invalid-sequence-index
        self.report_items(
            (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join(
                f"{self._get_col_name(left)} with {self._get_col_name(right)}"
                for left, right in self.attr_boxes.current_state()))))

    def merge(self):
        # pylint: disable=invalid-sequence-index
        pairs = self.attr_boxes.current_state()
        if not self._check_pair_types(pairs):
            return None
        left_vars, right_vars = zip(*pairs)
        left_mask = np.full(len(self.data), True)
        left = np.vstack(
            tuple(
                self._values(self.data, var, left_mask)
                for var in left_vars)).T
        right_mask = np.full(len(self.extra_data), True)
        right = np.vstack(
            tuple(
                self._values(self.extra_data, var, right_mask)
                for var in right_vars)).T
        if not self._check_uniqueness(left, left_mask, right, right_mask):
            return None
        method = self._merge_methods[self.merging]
        lefti, righti, rightu = method(self, left, left_mask, right,
                                       right_mask)
        reduced_extra_data = \
            self._compute_reduced_extra_data(right_vars, lefti, righti, rightu)
        return self._join_table_by_indices(reduced_extra_data, lefti, righti,
                                           rightu)

    def _check_pair_types(self, pairs):
        for left, right in pairs:
            if isinstance(left, ContinuousVariable) \
                    != isinstance(right, ContinuousVariable):
                self.Error.matching_numeric_with_nonnum(left, right)
                return False
            if INDEX in (left, right) and left != right:
                self.Error.matching_index_with_sth(
                    self._get_col_name(({left, right} - {INDEX}).pop()))
                return False
            if INSTANCEID in (left, right) and left != right:
                self.Error.matching_id_with_sth(
                    self._get_col_name(({left, right} - {INSTANCEID}).pop()))
                return False
        return True

    @staticmethod
    def _get_col_name(obj):
        return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower()

    def _check_uniqueness(self, left, left_mask, right, right_mask):
        ok = True
        masked_right = right[right_mask]
        if len(set(map(tuple, masked_right))) != len(masked_right):
            self.Error.nonunique_right()
            ok = False
        if self.merging != self.LeftJoin:
            masked_left = left[left_mask]
            if len(set(map(tuple, masked_left))) != len(masked_left):
                self.Error.nonunique_left()
                ok = False
        return ok

    def _compute_reduced_extra_data(self, right_match_vars, lefti, righti,
                                    rightu):
        """Prepare a table with extra columns that will appear in the merged
        table"""
        domain = self.data.domain
        extra_domain = self.extra_data.domain

        def var_needed(var):
            if rightu is not None and rightu.size:
                return True
            if var in right_match_vars and self.merging != self.OuterJoin:
                return False
            if var not in domain:
                return True
            both_defined = (lefti != -1) * (righti != -1)
            left_col = \
                self.data.get_column_view(var)[0][lefti[both_defined]]
            right_col = \
                self.extra_data.get_column_view(var)[0][righti[both_defined]]
            if var.is_primitive():
                left_col = left_col.astype(float)
                right_col = right_col.astype(float)
                mask_left = np.isfinite(left_col)
                mask_right = np.isfinite(right_col)
                return not (np.all(mask_left == mask_right) and np.all(
                    left_col[mask_left] == right_col[mask_right]))
            else:
                return not np.all(left_col == right_col)

        extra_vars = [
            var for var in chain(extra_domain.variables, extra_domain.metas)
            if var_needed(var)
        ]
        return self.extra_data[:, extra_vars]

    @staticmethod
    def _values(data, var, mask):
        """Return an iterotor over keys for rows of the table."""
        if var == INDEX:
            return np.arange(len(data))
        if var == INSTANCEID:
            return np.fromiter((inst.id for inst in data),
                               count=len(data),
                               dtype=np.int)
        col = data.get_column_view(var)[0]
        if var.is_primitive():
            col = col.astype(float, copy=False)
            nans = np.isnan(col)
            mask *= ~nans
            if var.is_discrete:
                col = col.astype(int)
                col[nans] = len(var.values)
                col = np.array(var.values + (np.nan, ))[col]
        else:
            col = col.copy()
            defined = col.astype(bool)
            mask *= defined
            col[~mask] = np.nan
        return col

    def _left_join_indices(self, left, left_mask, right, right_mask):
        """Compute a two-row array of indices:
        - the first row contains indices for the primary table,
        - the second row contains the matching rows in the extra table or -1"""
        data = self.data
        # Don't match nans. This is needed since numpy may change nan to string
        # nan, so nan's will match each other
        indices = np.arange(len(right))
        indices[~right_mask] = -1
        if right.shape[1] == 1:
            # The more common case can be handled faster
            right_map = dict(zip(right.flatten(), indices))
            righti = (right_map.get(val, -1) for val in left.flatten())
        else:
            right_map = dict(zip(map(tuple, right), indices))
            righti = (right_map.get(tuple(val), -1) for val in left)
        righti = np.fromiter(righti, dtype=np.int64, count=len(data))
        lefti = np.arange(len(data), dtype=np.int64)
        righti[lefti[~left_mask]] = -1
        return lefti, righti, None

    def _inner_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then remove those with no match in the second table"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        mask = righti != [-1]
        return lefti[mask], righti[mask], None

    def _outer_join_indices(self, left, left_mask, right, right_mask):
        """Use _augment_indices to compute the array of indices,
        then add rows in the second table without a match in the first"""
        lefti, righti, _ = \
            self._left_join_indices(left, left_mask, right, right_mask)
        unused = np.full(len(right), True)
        unused[righti] = False
        if len(right) - 1 not in righti:
            # righti can include -1, which sets the last element as used
            unused[-1] = True
        return lefti, righti, np.nonzero(unused)[0]

    _merge_methods = [
        _left_join_indices, _inner_join_indices, _outer_join_indices
    ]

    def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
        """Join (horizontally) self.data and reduced_extra, taking the pairs
        of rows given in indices"""
        if not lefti.size:
            return None
        lt_dom = self.data.domain
        xt_dom = reduced_extra.domain
        domain = self._domain_rename_duplicates(
            lt_dom.attributes + xt_dom.attributes,
            lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas)
        X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti,
                                        righti)
        Y = self._join_array_by_indices(np.c_[self.data.Y],
                                        np.c_[reduced_extra.Y], lefti, righti)
        string_cols = [
            i for i, var in enumerate(domain.metas) if var.is_string
        ]
        metas = self._join_array_by_indices(self.data.metas,
                                            reduced_extra.metas, lefti, righti,
                                            string_cols)
        if rightu is not None:
            # This domain is used for transforming the extra rows for outer join
            # It must use the original - not renamed - variables from right, so
            # values are copied,
            # but new domain for the left, so renamed values are *not* copied
            right_domain = Orange.data.Domain(
                domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes,
                domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars,
                domain.metas[:len(lt_dom.metas)] + xt_dom.metas)
            extras = self.extra_data[rightu].transform(right_domain)
            X = np.vstack((X, extras.X))
            extras_Y = extras.Y
            if extras_Y.ndim == 1:
                extras_Y = extras_Y.reshape(-1, 1)
            Y = np.vstack((Y, extras_Y))
            metas = np.vstack((metas, extras.metas))
        table = Orange.data.Table.from_numpy(domain, X, Y, metas)
        table.name = getattr(self.data, 'name', '')
        table.attributes = getattr(self.data, 'attributes', {})
        if rightu is not None:
            table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu]))
        else:
            table.ids = self.data.ids[lefti]

        return table

    def _domain_rename_duplicates(self, attributes, class_vars, metas):
        """Check for duplicate variable names in domain. If any, rename
        the variables, by replacing them with new ones (names are
        appended a number). """
        attrs, cvars, mets = [], [], []
        n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len(
            metas)
        lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas

        all_vars = attributes + class_vars + metas
        proposed_names = [m.name for m in all_vars]
        unique_names = get_unique_names_duplicates(proposed_names)
        duplicates = set()
        for p_name, u_name, var, c in zip(proposed_names, unique_names,
                                          all_vars, lists):
            if p_name != u_name:
                duplicates.add(p_name)
                var = var.copy(name=u_name)
            c.append(var)
        if duplicates:
            self.Warning.renamed_vars(", ".join(duplicates))
        return Orange.data.Domain(attrs, cvars, mets)

    @staticmethod
    def _join_array_by_indices(left, right, lefti, righti, string_cols=None):
        """Join (horizontally) two arrays, taking pairs of rows given in indices
        """
        def prepare(arr, inds, str_cols):
            try:
                newarr = arr[inds]
            except IndexError:
                newarr = np.full_like(arr, np.nan)
            else:
                empty = np.full(arr.shape[1], np.nan)
                if str_cols:
                    assert arr.dtype == object
                    empty = empty.astype(object)
                    empty[str_cols] = ''
                newarr[inds == -1] = empty
            return newarr

        left_width = left.shape[1]
        str_left = [i for i in string_cols or () if i < left_width]
        str_right = [
            i - left_width for i in string_cols or () if i >= left_width
        ]
        res = hstack((prepare(left, lefti,
                              str_left), prepare(right, righti, str_right)))
        return res

    @staticmethod
    def migrate_settings(settings, version=None):
        def mig_value(x):
            if x == "Position (index)":
                return INDEX
            if x == "Source position (index)":
                return INSTANCEID
            return x

        if not version:
            operations = ("augment", "merge", "combine")
            oper = operations[settings["merging"]]
            settings["attr_pairs"] = (True, True, [
                (mig_value(settings[f"attr_{oper}_data"]),
                 mig_value(settings[f"attr_{oper}_extra"]))
            ])
            for oper in operations:
                del settings[f"attr_{oper}_data"]
                del settings[f"attr_{oper}_extra"]

        if not version or version < 2 and "attr_pairs" in settings:
            data_exists, extra_exists, attr_pairs = settings.pop("attr_pairs")
            if not (data_exists and extra_exists):
                settings["context_settings"] = []
                return

            mapper = {0: (INDEX, 100), 1: (INSTANCEID, 100)}
            context = ContextHandler().new_context()
            context.values["attr_pairs"] = [
                tuple(mapper.get(var, (var, 100)) for var in pair)
                for pair in attr_pairs
            ]
            context.variables1 = {}
            context.variables2 = {}
            settings["context_settings"] = [context]
Example #7
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message("Click on the table header to select shown columns",
                       "click_header")
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    BUILTIN_ORDER = {
        DiscreteVariable: ("AUC", "CA", "F1", "Precision", "Recall"),
        ContinuousVariable: ("MSE", "RMSE", "MAE", "R2")
    }

    shown_scores = \
        settings.Setting(set(chain(*BUILTIN_ORDER.values())))

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train dataset is empty.")
        test_data_empty = Msg("Test dataset is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train datasets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        no_class_values = Msg("Target variable has no values.")
        only_one_class_var_value = Msg("Target variable has only one value.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(wordWrap=True, )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)
        header.setContextMenuPolicy(Qt.CustomContextMenu)
        header.customContextMenuRequested.connect(self.show_column_chooser)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        else:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        self.Error.class_required.clear()
        self.Error.too_many_classes.clear()
        self.Error.no_class_values.clear()
        self.Error.only_one_class_var_value.clear()
        if data is not None and not len(data):
            self.Error.train_data_empty()
            data = None
        if data:
            conds = [
                not data.domain.class_vars,
                len(data.domain.class_vars) > 1,
                np.isnan(data.Y).all(), data.domain.has_discrete_class
                and len(data.domain.class_var.values) == 1
            ]
            errors = [
                self.Error.class_required, self.Error.too_many_classes,
                self.Error.no_class_values, self.Error.only_one_class_var_value
            ]
            for cond, error in zip(conds, errors):
                if cond:
                    error()
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not len(data):
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data is None or self.data.domain.class_var is None:
            self.scorers = []
            return
        class_var = self.data and self.data.domain.class_var
        order = {
            name: i
            for i, name in enumerate(self.BUILTIN_ORDER[type(class_var)])
        }
        # 'abstract' is retrieved from __dict__ to avoid inheriting
        usable = (cls for cls in scoring.Score.registry.values()
                  if cls.is_scalar and not cls.__dict__.get("abstract")
                  and isinstance(class_var, cls.class_types))
        self.scorers = sorted(usable, key=lambda cls: order.get(cls.name, 99))

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self._update_header()
        self._update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()
        self.__update()

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        model = self.result_model
        model.setColumnCount(1 + len(self.scorers))
        for col, score in enumerate(self.scorers):
            item = QStandardItem(score.name)
            item.setToolTip(score.long_name)
            model.setHorizontalHeaderItem(col + 1, item)
        self._update_shown_columns()

    def _update_shown_columns(self):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            header.setSectionHidden(section, col_name not in self.shown_scores)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}".format(
                                  name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [
                        Try(scorer_caller(scorer, ovr_results, target=1))
                        for scorer in self.scorers
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.__needupdate = True

    def show_column_chooser(self, pos):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        def update(col_name, checked):
            if checked:
                self.shown_scores.add(col_name)
            else:
                self.shown_scores.remove(col_name)
            self._update_shown_columns()

        menu = QMenu()
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            action = menu.addAction(col_name)
            action.setCheckable(True)
            action.setChecked(col_name in self.shown_scores)
            action.triggered.connect(partial(update, col_name))
        menu.exec(header.mapToGlobal(pos))

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(
                    combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')
            ]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value, processEvents=False)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestLearners.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
        )
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestLearners.KFold:
            folds = self.NFolds[self.n_folds]
            test_f = partial(Orange.evaluation.CrossValidation,
                             self.data,
                             learners_c,
                             k=folds,
                             random_state=rstate,
                             **common_args)
        elif self.resampling == OWTestLearners.FeatureFold:
            test_f = partial(Orange.evaluation.CrossValidationFeature,
                             self.data, learners_c, self.fold_feature,
                             **common_args)
        elif self.resampling == OWTestLearners.LeaveOneOut:
            test_f = partial(Orange.evaluation.LeaveOneOut, self.data,
                             learners_c, **common_args)
        elif self.resampling == OWTestLearners.ShuffleSplit:
            train_size = self.SampleSizes[self.sample_size] / 100
            test_f = partial(Orange.evaluation.ShuffleSplit,
                             self.data,
                             learners_c,
                             n_resamples=self.NRepeats[self.n_repeats],
                             train_size=train_size,
                             test_size=None,
                             stratified=self.shuffle_stratified,
                             random_state=rstate,
                             **common_args)
        elif self.resampling == OWTestLearners.TestOnTrain:
            test_f = partial(Orange.evaluation.TestOnTrainingData, self.data,
                             learners_c, **common_args)
        elif self.resampling == OWTestLearners.TestOnTest:
            test_f = partial(Orange.evaluation.TestOnTestData, self.data,
                             self.test_data, learners_c, **common_args)
        else:
            assert False, "self.resampling %s" % self.resampling

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[float]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = Task()

        def progress_callback(finished):
            if task.cancelled:
                raise UserInterrupt()
            QMetaObject.invokeMethod(self, "setProgressValue",
                                     Qt.QueuedConnection,
                                     Q_ARG(float, 100 * finished))

        def ondone(_):
            QMetaObject.invokeMethod(self, "__task_complete",
                                     Qt.QueuedConnection, Q_ARG(object, task))

        testfunc = partial(testfunc, callback=progress_callback)
        task.future = self.__executor.submit(testfunc)
        task.future.add_done_callback(ondone)

        self.progressBarInit(processEvents=None)
        self.setBlocking(True)
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, task):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        if self.__task is not task:
            assert task.cancelled
            log.debug("Reaping cancelled task: %r", "<>")
            return

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)
        self.setStatusMessage("")
        result = task.future
        assert result.done()
        self.__task = None
        try:
            results = result.result()  # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:
            log.exception("testing error (in __task_complete):", exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er),
                                                                 er)))
            self.__state = State.Done
            return

        self.__state = State.Done

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [
                        Try(scorer_caller(scorer, result))
                        for scorer in self.scorers
                    ]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self._update_header()
        self._update_stats_model()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            assert task.future.done()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
class OWItemsets(widget.OWWidget):
    name = 'Frequent Itemsets'
    description = 'Explore sets of items that frequently appear together.'
    icon = 'icons/FrequentItemsets.svg'
    priority = 10

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        matching_data = Output("Matching Data", Table)

    class Error(widget.OWWidget.Error):
        need_discrete_data = widget.Msg(
            "Need some discrete data to work with.")
        no_disc_features = widget.Msg(
            "Discrete features required but data has none.")

    class Warning(widget.OWWidget.Warning):
        cont_attrs = widget.Msg(
            "Data has continuous attributes which will be skipped.")
        err_reg_expression = widget.Msg("Error in regular expression: {}")

    minSupport = settings.Setting(30)
    maxItemsets = settings.Setting(10000)
    filterSearch = settings.Setting(True)
    autoFind = settings.Setting(False)
    autoSend = settings.Setting(True)
    filterKeywords = settings.Setting('')
    filterMinItems = settings.Setting(1)
    filterMaxItems = settings.Setting(10000)

    UserAdviceMessages = [
        widget.Message(
            'Itemset are listed in item-sorted order, i.e. '
            'an itemset containing A and B is only listed once, as '
            'A > B (and not also B > A).', 'itemsets-order',
            widget.Message.Warning),
        widget.Message(
            'To select all the itemsets that are descendants of '
            '(include) some item X (i.e. the whole subtree), you '
            'can fold the subtree at that item and then select it.',
            'itemsets-order', widget.Message.Information)
    ]

    def __init__(self):
        self.data = None
        self._is_running = False
        self.isRegexMatch = lambda x: True
        self.tree = QTreeWidget(self.mainArea,
                                columnCount=2,
                                allColumnsShowFocus=True,
                                alternatingRowColors=True,
                                selectionMode=QTreeWidget.ExtendedSelection,
                                uniformRowHeights=True)
        self.tree.setHeaderLabels(["Itemsets", "Support", "%"])
        self.tree.header().setStretchLastSection(True)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)
        self.mainArea.layout().addWidget(self.tree)

        box = gui.widgetBox(self.controlArea, "Info")
        self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = ''
        gui.label(box, self, "Number of itemsets: %(nItemsets)s")
        gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s")
        gui.label(box, self, "Selected examples: %(nSelectedExamples)s")
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.button(hbox, self, "Expand all", callback=self.tree.expandAll)
        gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll)

        box = gui.widgetBox(self.controlArea, 'Find itemsets')
        gui.valueSlider(box,
                        self,
                        'minSupport',
                        values=[.0001, .0005, .001, .005, .01, .05, .1, .5] +
                        list(range(1, 101)),
                        label='Minimal support:',
                        labelFormat="%g%%",
                        callback=lambda: self.find_itemsets())
        gui.hSlider(box,
                    self,
                    'maxItemsets',
                    minValue=10000,
                    maxValue=100000,
                    step=10000,
                    label='Max. number of itemsets:',
                    labelFormat="%d",
                    callback=lambda: self.find_itemsets())
        self.button = gui.auto_commit(
            box,
            self,
            'autoFind',
            'Find Itemsets',
            commit=self.find_itemsets,
            callback=lambda: self.autoFind and self.find_itemsets())

        box = gui.widgetBox(self.controlArea, 'Filter itemsets')
        gui.lineEdit(box,
                     self,
                     'filterKeywords',
                     'Contains:',
                     callback=self.filter_change,
                     orientation='horizontal',
                     tooltip='A comma or space-separated list of regular '
                     'expressions.')
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.spin(hbox,
                 self,
                 'filterMinItems',
                 1,
                 998,
                 label='Min. items:',
                 callback=self.filter_change)
        gui.spin(hbox,
                 self,
                 'filterMaxItems',
                 2,
                 999,
                 label='Max. items:',
                 callback=self.filter_change)
        gui.checkBox(box,
                     self,
                     'filterSearch',
                     label='Apply these filters in search',
                     tooltip='If checked, the itemsets are filtered according '
                     'to these filter conditions already in the search '
                     'phase. \nIf unchecked, the only filters applied '
                     'during search are the ones above, '
                     'and the itemsets are \nfiltered afterwards only for '
                     'display, i.e. only the matching itemsets are shown.')

        gui.rubber(hbox)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection')

        self.filter_change()

    ITEM_DATA_ROLE = Qt.UserRole + 1

    def selectionChanged(self):
        X = self.X
        mapping = self.onehot_mapping
        instances = set()
        where = np.where

        def whole_subtree(node):
            yield node
            for i in range(node.childCount()):
                yield from whole_subtree(node.child(i))

        def itemset(node):
            while node:
                yield node.data(0, self.ITEM_DATA_ROLE)
                node = node.parent()

        def selection_ranges(node):
            n_children = node.childCount()
            if n_children:
                yield (self.tree.indexFromItem(node.child(0)),
                       self.tree.indexFromItem(node.child(n_children - 1)))
            for i in range(n_children):
                yield from selection_ranges(node.child(i))

        nSelectedItemsets = 0
        item_selection = QItemSelection()
        for node in self.tree.selectedItems():
            nodes = (node, ) if node.isExpanded() else whole_subtree(node)
            if not node.isExpanded():
                for srange in selection_ranges(node):
                    item_selection.select(*srange)
            for node in nodes:
                nSelectedItemsets += 1
                cols, vals = zip(*(mapping[i] for i in itemset(node)))
                if issparse(X):
                    rows = (len(cols) == np.bincount(
                        (X[:, cols] != 0).indices,
                        minlength=X.shape[0])).nonzero()[0]
                else:
                    rows = where((X[:, cols] == vals).all(axis=1))[0]
                instances.update(rows)
        self.tree.itemSelectionChanged.disconnect(self.selectionChanged)
        self.tree.selectionModel().select(
            item_selection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)

        self.nSelectedExamples = len(instances)
        self.nSelectedItemsets = nSelectedItemsets
        self.output = self.data[sorted(instances)] or None
        self.commit()

    def commit(self):
        self.Outputs.matching_data.send(self.output)

    def filter_change(self):
        self.Warning.err_reg_expression.clear()
        try:
            isRegexMatch = self.isRegexMatch = re.compile(
                '|'.join(
                    i.strip()
                    for i in re.split('(,|\s)+', self.filterKeywords.strip())
                    if i.strip()), re.IGNORECASE).search
        except Exception as e:
            self.Warning.err_reg_expression(e.args[0])
            isRegexMatch = self.isRegexMatch = lambda x: True

        def hide(node, depth, has_kw):
            if not has_kw:
                has_kw = isRegexMatch(node.text(0))
            hidden = (
                sum(
                    hide(node.child(i), depth + 1, has_kw)
                    for i in range(node.childCount())) == node.childCount()
                if node.childCount() else
                (not has_kw
                 or not self.filterMinItems <= depth <= self.filterMaxItems))
            node.setHidden(hidden)
            return hidden

        hide(self.tree.invisibleRootItem(), 0, False)

    class TreeWidgetItem(QTreeWidgetItem):
        def data(self, column, role):
            """Construct lazy tooltips"""
            if role != Qt.ToolTipRole:
                return super().data(column, role)
            tooltip = []
            while self:
                tooltip.append(self.text(0))
                self = self.parent()
            return '\n'.join(reversed(tooltip))

    def find_itemsets(self):
        if self.data is None or not len(self.data):
            return
        if self._is_running:
            self._is_running = False
            return
        self._is_running = True

        self.button.button.setText('Cancel')

        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.Error.need_discrete_data.clear()
        if X is None:
            self.Error.need_discrete_data()

        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {
            item: ITEM_FMT.format(var.name, val)
            for item, var, val in OneHot.decode(mapping.keys(), data, mapping)
        }
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        with self.progressBar(self.maxItemsets + 1) as progress:
            for itemset, support in frequent_itemsets(X,
                                                      self.minSupport / 100):

                if filterSearch and not filterMinItems <= len(
                        itemset) <= filterMaxItems:
                    continue

                parent = top
                first_new_item = None
                itemset_matches_filter = False

                for item in sorted(itemset):
                    name = names[item]

                    if filterSearch and not itemset_matches_filter:
                        itemset_matches_filter = isRegexMatch(name)

                    child = parent.get(name)
                    if child is None:
                        try:
                            wi = self.TreeWidgetItem(parent.item, [
                                name,
                                str(support), '{:.4g}'.format(
                                    100 * support / len(data))
                            ])
                        except RuntimeError:
                            # FIXME: When autoFind was in effect and the support
                            # slider was moved, this line excepted with:
                            #     RuntimeError: wrapped C/C++ object of type
                            #                   TreeWidgetItem has been deleted
                            return
                        wi.setData(0, self.ITEM_DATA_ROLE, item)
                        child = parent[name] = ItemDict(wi)

                        if first_new_item is None:
                            first_new_item = (parent, name)
                    parent = child

                if filterSearch and not itemset_matches_filter:
                    parent, name = first_new_item
                    parent.item.removeChild(parent[name].item)
                    del parent[name].item
                    del parent[name]
                else:
                    nItemsets += 1
                    progress.advance()

                if not self._is_running or nItemsets >= self.maxItemsets:
                    break

                qApp.processEvents()

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        self._is_running = False
        self.button.button.setText('Find Itemsets')

    @Inputs.data
    def set_data(self, data):
        self.data = data
        is_error = False
        if data is not None:
            self.Warning.cont_attrs.clear()
            self.Error.no_disc_features.clear()
            self.button.setDisabled(False)
            self.X = data.X
            if issparse(data.X):
                self.X = data.X.tocsc()
            else:
                if not data.domain.has_discrete_attributes():
                    self.Error.no_disc_features()
                    is_error = True
                    self.button.setDisabled(True)
                elif data.domain.has_continuous_attributes():
                    self.Warning.cont_attrs()
        else:
            self.output = None
            self.commit()
        if self.autoFind and not is_error:
            self.find_itemsets()
class OWImageViewer(widget.OWWidget):
    name = "Image Viewer"
    description = "View images referred to in the data."
    icon = "icons/ImageViewer.svg"
    priority = 130
    replaces = ["Orange.widgets.data.owimageviewer.OWImageViewer", ]

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.DomainContextHandler()

    imageAttr = settings.ContextSetting(0)
    titleAttr = settings.ContextSetting(0)

    imageSize = settings.Setting(100)
    autoCommit = settings.Setting(True)

    buttons_area_orientation = Qt.Vertical
    graph_name = "scene"

    UserAdviceMessages = [
        widget.Message(
            "Pressing the 'Space' key while the thumbnail view has focus and "
            "a selected item will open a window with a full image",
            persistent_id="preview-introduction")
    ]

    def __init__(self):
        super().__init__()
        self.data = None
        self.allAttrs = []
        self.stringAttrs = []

        self.selectedIndices = []

        #: List of _ImageItems
        self.items = []

        self._errcount = 0
        self._successcount = 0

        self.info = gui.widgetLabel(
            gui.vBox(self.controlArea, "Info"),
            "Waiting for input.\n"
        )

        self.imageAttrCB = gui.comboBox(
            self.controlArea, self, "imageAttr",
            box="Image Filename Attribute",
            tooltip="Attribute with image filenames",
            callback=[self.clearScene, self.setupScene],
            contentsLength=12,
            addSpace=True,
        )

        self.titleAttrCB = gui.comboBox(
            self.controlArea, self, "titleAttr",
            box="Title Attribute",
            tooltip="Attribute with image title",
            callback=self.updateTitles,
            contentsLength=12,
            addSpace=True
        )

        gui.hSlider(
            self.controlArea, self, "imageSize",
            box="Image Size", minValue=32, maxValue=1024, step=16,
            callback=self.updateSize,
            createLabel=False
        )
        gui.rubber(self.controlArea)

        gui.auto_commit(self.buttonsArea, self, "autoCommit", "Send", box=False)

        self.thumbnailView = ThumbnailView(
            alignment=Qt.AlignTop | Qt.AlignLeft,  # scene alignment,
            focusPolicy=Qt.StrongFocus,
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn
        )
        self.mainArea.layout().addWidget(self.thumbnailView)
        self.scene = self.thumbnailView.scene()
        self.scene.selectionChanged.connect(self.onSelectionChanged)
        self.loader = ImageLoader(self)

    def sizeHint(self):
        return QSize(800, 600)

    @Inputs.data
    def setData(self, data):
        self.closeContext()
        self.clear()

        self.data = data

        if data is not None:
            domain = data.domain
            self.allAttrs = (domain.class_vars + domain.metas +
                             domain.attributes)
            self.stringAttrs = [a for a in domain.metas if a.is_string]

            self.stringAttrs = sorted(
                self.stringAttrs,
                key=lambda attr: 0 if "type" in attr.attributes else 1
            )

            indices = [i for i, var in enumerate(self.stringAttrs)
                       if var.attributes.get("type") == "image"]
            if indices:
                self.imageAttr = indices[0]

            self.imageAttrCB.setModel(VariableListModel(self.stringAttrs))
            self.titleAttrCB.setModel(VariableListModel(self.allAttrs))

            self.openContext(data)

            self.imageAttr = max(min(self.imageAttr, len(self.stringAttrs) - 1), 0)
            self.titleAttr = max(min(self.titleAttr, len(self.allAttrs) - 1), 0)

            if self.stringAttrs:
                self.setupScene()
        else:
            self.info.setText("Waiting for input.\n")

    def clear(self):
        self.data = None
        self.error()
        self.imageAttrCB.clear()
        self.titleAttrCB.clear()
        self.clearScene()

    def setupScene(self):
        self.error()
        if self.data:
            attr = self.stringAttrs[self.imageAttr]
            titleAttr = self.allAttrs[self.titleAttr]
            assert self.thumbnailView.count() == 0
            size = QSizeF(self.imageSize, self.imageSize)

            for i, inst in enumerate(self.data):
                if not numpy.isfinite(inst[attr]):  # skip missing
                    continue
                url = self.urlFromValue(inst[attr])
                title = str(inst[titleAttr])

                thumbnail = GraphicsThumbnailWidget(QPixmap(), title=title)
                thumbnail.setThumbnailSize(size)
                thumbnail.setToolTip(url.toString())
                thumbnail.instance = inst
                self.thumbnailView.addThumbnail(thumbnail)

                if url.isValid() and url.isLocalFile():
                    reader = QImageReader(url.toLocalFile())
                    image = reader.read()
                    if image.isNull():
                        error = reader.errorString()
                        thumbnail.setToolTip(
                            thumbnail.toolTip() + "\n" + error)
                        self._errcount += 1
                    else:
                        pixmap = QPixmap.fromImage(image)
                        thumbnail.setPixmap(pixmap)
                        self._successcount += 1

                    future = Future()
                    future.set_result(image)
                    future._reply = None
                elif url.isValid():
                    future = self.loader.get(url)

                    @future.add_done_callback
                    def set_pixmap(future, thumb=thumbnail):
                        if future.cancelled():
                            return

                        assert future.done()

                        if future.exception():
                            # Should be some generic error image.
                            pixmap = QPixmap()
                            thumb.setToolTip(thumb.toolTip() + "\n" +
                                             str(future.exception()))
                        else:
                            pixmap = QPixmap.fromImage(future.result())

                        thumb.setPixmap(pixmap)

                        self._noteCompleted(future)
                else:
                    future = None

                self.items.append(_ImageItem(i, thumbnail, url, future))

            if any(it.future is not None and not it.future.done()
                   for it in self.items):
                self.info.setText("Retrieving...\n")
            else:
                self._updateStatus()

    def urlFromValue(self, value):
        variable = value.variable
        origin = variable.attributes.get("origin", "")
        if origin and QDir(origin).exists():
            origin = QUrl.fromLocalFile(origin)
        elif origin:
            origin = QUrl(origin)
            if not origin.scheme():
                origin.setScheme("file")
        else:
            origin = QUrl("")
        base = origin.path()
        if base.strip() and not base.endswith("/"):
            origin.setPath(base + "/")

        if os.path.exists(str(value)):
            url = QUrl.fromLocalFile(str(value))
        else:
            name = QUrl(str(value))
            url = origin.resolved(name)
        if not url.scheme():
            url.setScheme("file")
        return url

    def _cancelAllFutures(self):
        for item in self.items:
            if item.future is not None:
                item.future.cancel()
                if item.future._reply is not None:
                    item.future._reply.close()
                    item.future._reply.deleteLater()
                    item.future._reply = None

    def clearScene(self):
        self._cancelAllFutures()

        self.items = []
        self.thumbnailView.clear()
        self._errcount = 0
        self._successcount = 0

    def thumbnailItems(self):
        return [item.widget for item in self.items]

    def updateSize(self):
        size = QSizeF(self.imageSize, self.imageSize)
        for item in self.thumbnailItems():
            item.setThumbnailSize(size)

    def updateTitles(self):
        titleAttr = self.allAttrs[self.titleAttr]
        for item in self.items:
            item.widget.setTitle(str(item.widget.instance[titleAttr]))

    def onSelectionChanged(self):
        selected = [item for item in self.items if item.widget.isSelected()]
        self.selectedIndices = [item.index for item in selected]
        self.commit()

    def commit(self):
        if self.data:
            if self.selectedIndices:
                selected = self.data[self.selectedIndices]
            else:
                selected = None
            self.Outputs.data.send(selected)
        else:
            self.Outputs.data.send(None)

    def _noteCompleted(self, future):
        # Note the completed future's state
        if future.cancelled():
            return

        if future.exception():
            self._errcount += 1
            _log.debug("Error: %r", future.exception())
        else:
            self._successcount += 1

        self._updateStatus()

    def _updateStatus(self):
        count = len([item for item in self.items if item.future is not None])
        self.info.setText(
            "Retrieving:\n" +
            "{} of {} images".format(self._successcount, count))

        if self._errcount + self._successcount == count:
            if self._errcount:
                self.info.setText(
                    "Done:\n" +
                    "{} images, {} errors".format(count, self._errcount)
                )
            else:
                self.info.setText(
                    "Done:\n" +
                    "{} images".format(count)
                )
            attr = self.stringAttrs[self.imageAttr]
            if self._errcount == count and "type" not in attr.attributes:
                self.error("No images found! Make sure the '%s' attribute "
                           "is tagged with 'type=image'" % attr.name)

    def onDeleteWidget(self):
        self._cancelAllFutures()
        self.clear()
Example #10
0
class OWFile(widget.OWWidget, RecentPathsWComboMixin):
    name = "文件(File)"
    id = "orange.widgets.data.file"
    description = "从输入文件或网络读取数据并将数据表发送到输出。"

    icon = "icons/File.svg"
    priority = 10
    category = "Data"
    keywords = ["file", "load", "read", "open"]

    class Outputs:
        data = Output("数据(Data)",
                      Table,
                      doc="Attribute-valued dataset read from the input file.",
                      replaces=['Data'])

    want_main_area = False

    SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())]
    SIZE_LIMIT = 1e7
    LOCAL_FILE, URL = range(2)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    # pylint seems to want declarations separated from definitions
    recent_paths: List[RecentPath]
    recent_urls: List[str]
    variables: list

    # Overload RecentPathsWidgetMixin.recent_paths to set defaults
    recent_paths = Setting([
        RecentPath("", "sample-datasets", "iris.tab"),
        RecentPath("", "sample-datasets", "titanic.tab"),
        RecentPath("", "sample-datasets", "housing.tab"),
        RecentPath("", "sample-datasets", "heart_disease.tab"),
        RecentPath("", "sample-datasets", "brown-selected.tab"),
        RecentPath("", "sample-datasets", "zoo.tab"),
    ])
    recent_urls = Setting([])
    source = Setting(LOCAL_FILE)
    xls_sheet = ContextSetting("")
    sheet_names = Setting({})
    url = Setting("")

    variables = ContextSetting([])

    domain_editor = SettingProvider(DomainEditor)

    class Warning(widget.OWWidget.Warning):
        file_too_big = widget.Msg(
            "The file is too large to load automatically."
            " Press Reload to load.")
        load_warning = widget.Msg("Read warning:\n{}")
        performance_warning = widget.Msg(
            "Categorical variables with >100 values may decrease performance.")

    class Error(widget.OWWidget.Error):
        file_not_found = widget.Msg("File not found.")
        missing_reader = widget.Msg("Missing reader.")
        sheet_error = widget.Msg("Error listing available sheets.")
        unknown = widget.Msg("Read error:\n{}")

    class NoFileSelected:
        pass

    UserAdviceMessages = [
        widget.Message(
            "Use CSV File Import widget for advanced options "
            "for comma-separated files", "use-csv-file-import"),
        widget.Message(
            "This widget loads only tabular data. Use other widgets to load "
            "other data types like models, distance matrices and networks.",
            "other-data-types")
    ]

    def __init__(self):
        super().__init__()
        RecentPathsWComboMixin.__init__(self)
        self.domain = None
        self.data = None
        self.loaded_file = ""
        self.reader = None

        layout = QGridLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)
        vbox = gui.radioButtons(None,
                                self,
                                "source",
                                box=True,
                                addSpace=True,
                                callback=self.load_data,
                                addToLayout=False)

        rb_button = gui.appendRadioButton(vbox, "文件:", addToLayout=False)
        layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter)

        box = gui.hBox(None, addToLayout=False, margin=0)
        box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.activated[int].connect(self.select_file)
        box.layout().addWidget(self.file_combo)
        layout.addWidget(box, 0, 1)

        file_button = gui.button(None,
                                 self,
                                 '...',
                                 callback=self.browse_file,
                                 autoDefault=False)
        file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon))
        file_button.setSizePolicy(Policy.Maximum, Policy.Fixed)
        layout.addWidget(file_button, 0, 2)

        reload_button = gui.button(None,
                                   self,
                                   "重新加载",
                                   callback=self.load_data,
                                   autoDefault=False)
        reload_button.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed)
        layout.addWidget(reload_button, 0, 3)

        self.sheet_box = gui.hBox(None, addToLayout=False, margin=0)
        self.sheet_combo = gui.comboBox(
            None,
            self,
            "xls_sheet",
            callback=self.select_sheet,
            sendSelectedValue=True,
        )
        self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_label = QLabel()
        self.sheet_label.setText('Sheet')
        self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft)
        self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter)
        layout.addWidget(self.sheet_box, 2, 1)
        self.sheet_box.hide()

        rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False)
        layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter)

        self.url_combo = url_combo = QComboBox()
        url_model = NamedURLModel(self.sheet_names)
        url_model.wrap(self.recent_urls)
        url_combo.setLineEdit(LineEditSelectOnFocus())
        url_combo.setModel(url_model)
        url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed)
        url_combo.setEditable(True)
        url_combo.setInsertPolicy(url_combo.InsertAtTop)
        url_edit = url_combo.lineEdit()
        l, t, r, b = url_edit.getTextMargins()
        url_edit.setTextMargins(l + 5, t, r, b)
        layout.addWidget(url_combo, 3, 1, 3, 3)
        url_combo.activated.connect(self._url_set)
        # whit completer we set that combo box is case sensitive when
        # matching the history
        completer = QCompleter()
        completer.setCaseSensitivity(Qt.CaseSensitive)
        url_combo.setCompleter(completer)

        box = gui.vBox(self.controlArea, "信息")
        self.infolabel = gui.widgetLabel(box, '未加载数据。')
        self.warnings = gui.widgetLabel(box, '')

        box = gui.widgetBox(self.controlArea, "列(双击编辑)")
        self.domain_editor = DomainEditor(self)
        self.editor_model = self.domain_editor.model()
        box.layout().addWidget(self.domain_editor)

        box = gui.hBox(self.controlArea)
        gui.button(box,
                   self,
                   "浏览文档数据集",
                   callback=lambda: self.browse_file(True),
                   autoDefault=False)
        gui.rubber(box)

        gui.button(box, self, "重置", callback=self.reset_domain_edit)
        self.apply_button = gui.button(box,
                                       self,
                                       "应用",
                                       callback=self.apply_domain_edit)
        self.apply_button.setEnabled(False)
        self.apply_button.setFixedWidth(170)
        self.editor_model.dataChanged.connect(
            lambda: self.apply_button.setEnabled(True))

        self.set_file_list()
        # Must not call open_file from within __init__. open_file
        # explicitly re-enters the event loop (by a progress bar)

        self.setAcceptDrops(True)

        if self.source == self.LOCAL_FILE:
            last_path = self.last_path()
            if last_path and os.path.exists(last_path) and \
                    os.path.getsize(last_path) > self.SIZE_LIMIT:
                self.Warning.file_too_big()
                return

        QTimer.singleShot(0, self.load_data)

    @staticmethod
    def sizeHint():
        return QSize(600, 550)

    def select_file(self, n):
        assert n < len(self.recent_paths)
        super().select_file(n)
        if self.recent_paths:
            self.source = self.LOCAL_FILE
            self.load_data()
            self.set_file_list()

    def select_sheet(self):
        self.recent_paths[0].sheet = self.sheet_combo.currentText()
        self.load_data()

    def _url_set(self):
        url = self.url_combo.currentText()
        pos = self.recent_urls.index(url)
        url = url.strip()

        if not urlparse(url).scheme:
            url = 'http://' + url
            self.url_combo.setItemText(pos, url)
            self.recent_urls[pos] = url

        self.source = self.URL
        self.load_data()

    def browse_file(self, in_demos=False):
        if in_demos:
            start_file = get_sample_datasets_dir()
            if not os.path.exists(start_file):
                QMessageBox.information(None, "文件", "无法找到文件")
                return
        else:
            start_file = self.last_path() or os.path.expanduser("~/")

        readers = [
            f for f in FileFormat.formats
            if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)
        ]
        filename, reader, _ = open_filename_dialog(start_file, None, readers)
        if not filename:
            return
        self.add_path(filename)
        if reader is not None:
            self.recent_paths[0].file_format = reader.qualified_name()

        self.source = self.LOCAL_FILE
        self.load_data()

    # Open a file, create data from it and send it over the data channel
    def load_data(self):
        # We need to catch any exception type since anything can happen in
        # file readers
        self.closeContext()
        self.domain_editor.set_domain(None)
        self.apply_button.setEnabled(False)
        self.clear_messages()
        self.set_file_list()

        error = self._try_load()
        if error:
            error()
            self.data = None
            self.sheet_box.hide()
            self.Outputs.data.send(None)
            self.infolabel.setText("无数据。")

    def _try_load(self):
        # pylint: disable=broad-except
        if self.last_path() and not os.path.exists(self.last_path()):
            return self.Error.file_not_found

        try:
            self.reader = self._get_reader()
            assert self.reader is not None
        except Exception:
            return self.Error.missing_reader

        if self.reader is self.NoFileSelected:
            self.Outputs.data.send(None)
            return None

        try:
            self._update_sheet_combo()
        except Exception:
            return self.Error.sheet_error

        with catch_warnings(record=True) as warnings:
            try:
                data = self.reader.read()
            except Exception as ex:
                log.exception(ex)
                return lambda x=ex: self.Error.unknown(str(x))
            if warnings:
                self.Warning.load_warning(warnings[-1].message.args[0])

        self.infolabel.setText(self._describe(data))

        self.loaded_file = self.last_path()
        add_origin(data, self.loaded_file)
        self.data = data
        self.openContext(data.domain)
        self.apply_domain_edit()  # sends data
        return None

    def _get_reader(self) -> FileFormat:
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if path is None:
                return self.NoFileSelected
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
            else:
                reader = FileFormat.get_reader(path)
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        else:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)
            else:
                return self.NoFileSelected

    def _update_sheet_combo(self):
        if len(self.reader.sheets) < 2:
            self.sheet_box.hide()
            self.reader.select_sheet(None)
            return

        self.sheet_combo.clear()
        self.sheet_combo.addItems(self.reader.sheets)
        self._select_active_sheet()
        self.sheet_box.show()

    def _select_active_sheet(self):
        if self.reader.sheet:
            try:
                idx = self.reader.sheets.index(self.reader.sheet)
                self.sheet_combo.setCurrentIndex(idx)
            except ValueError:
                # Requested sheet does not exist in this file
                self.reader.select_sheet(None)
        else:
            self.sheet_combo.setCurrentIndex(0)

    @staticmethod
    def _describe(table):
        def missing_prop(prop):
            if prop:
                return f"({prop * 100:.1f}% 个缺失值)"
            else:
                return "(无缺失值)"

        domain = table.domain
        text = ""

        attrs = getattr(table, "attributes", {})
        descs = [
            attrs[desc] for desc in ("Name", "Description") if desc in attrs
        ]
        if len(descs) == 2:
            descs[0] = f"<b>{descs[0]}</b>"
        if descs:
            text += f"<p>{'<br/>'.join(descs)}</p>"

        text += f"<p>{len(table)} 条数据"

        missing_in_attr = missing_prop(table.has_missing_attribute()
                                       and table.get_nan_frequency_attribute())
        missing_in_class = missing_prop(table.has_missing_class()
                                        and table.get_nan_frequency_class())
        text += f"<br/>特征数目: {len(domain.attributes)} {missing_in_attr}"
        if domain.has_continuous_class:
            text += f"<br/>回归; 数值类 {missing_in_class}"
        elif domain.has_discrete_class:
            text += "<br/>分类: 分类种类共 " \
                f"{len(domain.class_var.values)} 个 {missing_in_class}"
        elif table.domain.class_vars:
            text += "<br/>Multi-target; " \
                f"{len(table.domain.class_vars)} target variables " \
                f"{missing_in_class}"
        else:
            text += "<br/>Data has no target variable."
        text += f"<br/>元属性: { len(domain.metas)}"
        text += "</p>"

        if 'Timestamp' in table.domain:
            # Google Forms uses this header to timestamp responses
            text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \
                f"Last entry: {table[-1, 'Timestamp']}</p>"
        return text

    def storeSpecificSettings(self):
        self.current_context.modified_variables = self.variables[:]

    def retrieveSpecificSettings(self):
        if hasattr(self.current_context, "modified_variables"):
            self.variables[:] = self.current_context.modified_variables

    def reset_domain_edit(self):
        self.domain_editor.reset_domain()
        self.apply_domain_edit()

    def _inspect_discrete_variables(self, domain):
        for var in chain(domain.variables, domain.metas):
            if var.is_discrete and len(var.values) > 100:
                self.Warning.performance_warning()

    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)

    def get_widget_name_extension(self):
        _, name = os.path.split(self.loaded_file)
        return os.path.splitext(name)[0]

    def send_report(self):
        def get_ext_name(filename):
            try:
                return FileFormat.names[os.path.splitext(filename)[1]]
            except KeyError:
                return "unknown"

        if self.data is None:
            self.report_paragraph("File", "No file.")
            return

        if self.source == self.LOCAL_FILE:
            home = os.path.expanduser("~")
            if self.loaded_file.startswith(home):
                # os.path.join does not like ~
                name = "~" + os.path.sep + \
                       self.loaded_file[len(home):].lstrip("/").lstrip("\\")
            else:
                name = self.loaded_file
            if self.sheet_combo.isVisible():
                name += f" ({self.sheet_combo.currentText()})"
            self.report_items("File", [("File name", name),
                                       ("Format", get_ext_name(name))])
        else:
            self.report_items("Data", [("Resource", self.url),
                                       ("Format", get_ext_name(self.url))])

        self.report_data("Data", self.data)

    @staticmethod
    def dragEnterEvent(event):
        """Accept drops of valid file urls"""
        urls = event.mimeData().urls()
        if urls:
            try:
                FileFormat.get_reader(urls[0].toLocalFile())
                event.acceptProposedAction()
            except IOError:
                pass

    def dropEvent(self, event):
        """Handle file drops"""
        urls = event.mimeData().urls()
        if urls:
            self.add_path(urls[0].toLocalFile())  # add first file
            self.source = self.LOCAL_FILE
            self.load_data()

    def workflowEnvChanged(self, key, value, oldvalue):
        """
        Function called when environment changes (e.g. while saving the scheme)
        It make sure that all environment connected values are modified
        (e.g. relative file paths are changed)
        """
        self.update_file_list(key, value, oldvalue)
Example #11
0
class OWTilefile(widget.OWWidget, RecentPathsWComboMixin):
    name = "Tile File"
    id = "orangecontrib.spectroscopy.widgets.tilefile"
    icon = "icons/tilefile.svg"
    description = "Read data tile-by-tile from input files, " \
                  "preprocess, and send a data table to the output."
    priority = 10000
    replaces = ["orangecontrib.protospec.widgets.owtilefile.OWTilefile"]

    class Inputs:
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        data = Output("Data",
                      Table,
                      doc="Preprocessed dataset read from the input files.")

    want_main_area = False

    SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())]
    # Always trigger size limit warning (never auto-load)
    SIZE_LIMIT = 0
    LOCAL_FILE, URL = range(2)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    # pylint seems to want declarations separated from definitions
    recent_paths: List[RecentPath]
    recent_urls: List[str]
    variables: list

    # Overload RecentPathsWidgetMixin.recent_paths to set defaults
    recent_paths = Setting([
        RecentPath("", "sample-datasets", "agilent/5_mosaic_agg1024.dmt"),
    ])
    recent_urls = Setting([])
    source = Setting(LOCAL_FILE)
    xls_sheet = ContextSetting("")
    sheet_names = Setting({})
    url = Setting("")

    variables = ContextSetting([])

    domain_editor = SettingProvider(DomainEditor)

    class Warning(widget.OWWidget.Warning):
        no_preprocessor = Msg("No preprocessor on input."
                              " Press Reload to load anyway.")
        file_too_big = widget.Msg(
            "The file is too large to load automatically."
            " Press Reload to load.")
        load_warning = widget.Msg("Read warning:\n{}")
        performance_warning = widget.Msg(
            "Categorical variables with >100 values may decrease performance.")

    class Error(widget.OWWidget.Error):
        missing_reader = Msg("No tile-by-tile reader for this file.")
        file_not_found = widget.Msg("File not found.")
        sheet_error = widget.Msg("Error listing available sheets.")
        unknown = widget.Msg("Read error:\n{}")

    class NoFileSelected:
        pass

    UserAdviceMessages = [
        widget.Message(
            "Connect a Preprocessor "
            "which results in data-reduction ",
            "to best make use of this widget."),
    ]

    def __init__(self):
        self.preprocessor = None
        super().__init__()
        ### owfile init code-copy ###
        RecentPathsWComboMixin.__init__(self)
        self.domain = None
        self.data = None
        self.loaded_file = ""
        self.reader = None

        layout = QGridLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)
        vbox = gui.radioButtons(None,
                                self,
                                "source",
                                box=True,
                                addSpace=True,
                                callback=self.load_data,
                                addToLayout=False)

        rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False)
        layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter)

        box = gui.hBox(None, addToLayout=False, margin=0)
        box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.activated[int].connect(self.select_file)
        box.layout().addWidget(self.file_combo)
        layout.addWidget(box, 0, 1)

        file_button = gui.button(None,
                                 self,
                                 '...',
                                 callback=self.browse_file,
                                 autoDefault=False)
        file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon))
        file_button.setSizePolicy(Policy.Maximum, Policy.Fixed)
        layout.addWidget(file_button, 0, 2)

        reload_button = gui.button(None,
                                   self,
                                   "Reload",
                                   callback=self.load_data,
                                   autoDefault=False)
        reload_button.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed)
        layout.addWidget(reload_button, 0, 3)

        self.sheet_box = gui.hBox(None, addToLayout=False, margin=0)
        self.sheet_combo = gui.comboBox(
            None,
            self,
            "xls_sheet",
            callback=self.select_sheet,
            sendSelectedValue=True,
        )
        self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_label = QLabel()
        self.sheet_label.setText('Sheet')
        self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft)
        self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter)
        layout.addWidget(self.sheet_box, 2, 1)
        self.sheet_box.hide()

        rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False)
        layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter)

        self.url_combo = url_combo = QComboBox()
        url_model = NamedURLModel(self.sheet_names)
        url_model.wrap(self.recent_urls)
        url_combo.setLineEdit(LineEditSelectOnFocus())
        url_combo.setModel(url_model)
        url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed)
        url_combo.setEditable(True)
        url_combo.setInsertPolicy(url_combo.InsertAtTop)
        url_edit = url_combo.lineEdit()
        l, t, r, b = url_edit.getTextMargins()
        url_edit.setTextMargins(l + 5, t, r, b)
        layout.addWidget(url_combo, 3, 1, 3, 3)
        url_combo.activated.connect(self._url_set)
        # whit completer we set that combo box is case sensitive when
        # matching the history
        completer = QCompleter()
        completer.setCaseSensitivity(Qt.CaseSensitive)
        url_combo.setCompleter(completer)

        box = gui.vBox(self.controlArea, "Info")
        self.infolabel = gui.widgetLabel(box, 'No data loaded.')
        self.warnings = gui.widgetLabel(box, '')

        box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)")
        self.domain_editor = DomainEditor(self)
        self.editor_model = self.domain_editor.model()
        box.layout().addWidget(self.domain_editor)

        box = gui.hBox(self.controlArea)
        gui.button(box,
                   self,
                   "Browse documentation datasets",
                   callback=lambda: self.browse_file(True),
                   autoDefault=False)
        gui.rubber(box)

        gui.button(box, self, "Reset", callback=self.reset_domain_edit)
        self.apply_button = gui.button(box,
                                       self,
                                       "Apply",
                                       callback=self.apply_domain_edit)
        self.apply_button.setEnabled(False)
        self.apply_button.setFixedWidth(170)
        self.editor_model.dataChanged.connect(
            lambda: self.apply_button.setEnabled(True))

        self.set_file_list()
        # Must not call open_file from within __init__. open_file
        # explicitly re-enters the event loop (by a progress bar)

        self.setAcceptDrops(True)
        ### End code copy ###

        box = gui.vBox(self.controlArea, "Preprocessor")
        self.info_preproc = gui.widgetLabel(box, 'No preprocessor on input.')

        self.Warning.file_too_big()

    ### owfile methods code-copy ###
    @staticmethod
    def sizeHint():
        return QSize(600, 550)

    def select_file(self, n):
        assert n < len(self.recent_paths)
        super().select_file(n)
        if self.recent_paths:
            self.source = self.LOCAL_FILE
            self.load_data()
            self.set_file_list()

    def select_sheet(self):
        self.recent_paths[0].sheet = self.sheet_combo.currentText()
        self.load_data()

    def _url_set(self):
        url = self.url_combo.currentText()
        pos = self.recent_urls.index(url)
        url = url.strip()

        if not urlparse(url).scheme:
            url = 'http://' + url
            self.url_combo.setItemText(pos, url)
            self.recent_urls[pos] = url

        self.source = self.URL
        self.load_data()

    # Open a file, create data from it and send it over the data channel
    def load_data(self):
        # We need to catch any exception type since anything can happen in
        # file readers
        self.closeContext()
        self.domain_editor.set_domain(None)
        self.apply_button.setEnabled(False)
        self.clear_messages()
        self.set_file_list()

        error = self._try_load()
        if error:
            error()
            self.data = None
            self.sheet_box.hide()
            self.Outputs.data.send(None)
            self.infolabel.setText("No data.")

    def _try_load(self):
        # pylint: disable=broad-except
        if self.last_path() and not os.path.exists(self.last_path()):
            return self.Error.file_not_found

        try:
            self.reader = self._get_reader()
            assert self.reader is not None
        except Exception:
            return self.Error.missing_reader

        if self.reader is self.NoFileSelected:
            self.Outputs.data.send(None)
            return None

        try:
            self._update_sheet_combo()
        except Exception:
            return self.Error.sheet_error

        with catch_warnings(record=True) as warnings:
            try:
                data = self.reader.read()
            except Exception as ex:
                log.exception(ex)
                return lambda x=ex: self.Error.unknown(str(x))
            if warnings:
                self.Warning.load_warning(warnings[-1].message.args[0])

        self.infolabel.setText(self._describe(data))

        self.loaded_file = self.last_path()
        add_origin(data, self.loaded_file)
        self.data = data
        self.openContext(data.domain)
        self.apply_domain_edit()  # sends data
        return None

    def _update_sheet_combo(self):
        if len(self.reader.sheets) < 2:
            self.sheet_box.hide()
            self.reader.select_sheet(None)
            return

        self.sheet_combo.clear()
        self.sheet_combo.addItems(self.reader.sheets)
        self._select_active_sheet()
        self.sheet_box.show()

    def _select_active_sheet(self):
        if self.reader.sheet:
            try:
                idx = self.reader.sheets.index(self.reader.sheet)
                self.sheet_combo.setCurrentIndex(idx)
            except ValueError:
                # Requested sheet does not exist in this file
                self.reader.select_sheet(None)
        else:
            self.sheet_combo.setCurrentIndex(0)

    @staticmethod
    def _describe(table):
        def missing_prop(prop):
            if prop:
                return f"({prop * 100:.1f}% missing values)"
            else:
                return "(no missing values)"

        domain = table.domain
        text = ""

        attrs = getattr(table, "attributes", {})
        descs = [
            attrs[desc] for desc in ("Name", "Description") if desc in attrs
        ]
        if len(descs) == 2:
            descs[0] = f"<b>{descs[0]}</b>"
        if descs:
            text += f"<p>{'<br/>'.join(descs)}</p>"

        text += f"<p>{len(table)} instance(s)"

        missing_in_attr = missing_prop(table.has_missing_attribute()
                                       and table.get_nan_frequency_attribute())
        missing_in_class = missing_prop(table.has_missing_class()
                                        and table.get_nan_frequency_class())
        text += f"<br/>{len(domain.attributes)} feature(s) {missing_in_attr}"
        if domain.has_continuous_class:
            text += f"<br/>Regression; numerical class {missing_in_class}"
        elif domain.has_discrete_class:
            text += "<br/>Classification; categorical class " \
                f"with {len(domain.class_var.values)} values {missing_in_class}"
        elif table.domain.class_vars:
            text += "<br/>Multi-target; " \
                f"{len(table.domain.class_vars)} target variables " \
                f"{missing_in_class}"
        else:
            text += "<br/>Data has no target variable."
        text += f"<br/>{len(domain.metas)} meta attribute(s)"
        text += "</p>"

        if 'Timestamp' in table.domain:
            # Google Forms uses this header to timestamp responses
            text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \
                f"Last entry: {table[-1, 'Timestamp']}</p>"
        return text

    def storeSpecificSettings(self):
        self.current_context.modified_variables = self.variables[:]

    def retrieveSpecificSettings(self):
        if hasattr(self.current_context, "modified_variables"):
            self.variables[:] = self.current_context.modified_variables

    def reset_domain_edit(self):
        self.domain_editor.reset_domain()
        self.apply_domain_edit()

    def _inspect_discrete_variables(self, domain):
        for var in chain(domain.variables, domain.metas):
            if var.is_discrete and len(var.values) > 100:
                self.Warning.performance_warning()

    def apply_domain_edit(self):
        self.Warning.performance_warning.clear()
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(
                self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            elif domain is self.data.domain:
                table = self.data
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})
                self._inspect_discrete_variables(domain)

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)

    def get_widget_name_extension(self):
        _, name = os.path.split(self.loaded_file)
        return os.path.splitext(name)[0]

    def send_report(self):
        def get_ext_name(filename):
            try:
                return FileFormat.names[os.path.splitext(filename)[1]]
            except KeyError:
                return "unknown"

        if self.data is None:
            self.report_paragraph("File", "No file.")
            return

        if self.source == self.LOCAL_FILE:
            home = os.path.expanduser("~")
            if self.loaded_file.startswith(home):
                # os.path.join does not like ~
                name = "~" + os.path.sep + \
                       self.loaded_file[len(home):].lstrip("/").lstrip("\\")
            else:
                name = self.loaded_file
            if self.sheet_combo.isVisible():
                name += f" ({self.sheet_combo.currentText()})"
            self.report_items("File", [("File name", name),
                                       ("Format", get_ext_name(name))])
        else:
            self.report_items("Data", [("Resource", self.url),
                                       ("Format", get_ext_name(self.url))])

        self.report_data("Data", self.data)

    @staticmethod
    def dragEnterEvent(event):
        """Accept drops of valid file urls"""
        urls = event.mimeData().urls()
        if urls:
            try:
                FileFormat.get_reader(urls[0].toLocalFile())
                event.acceptProposedAction()
            except IOError:
                pass

    def dropEvent(self, event):
        """Handle file drops"""
        urls = event.mimeData().urls()
        if urls:
            self.add_path(urls[0].toLocalFile())  # add first file
            self.source = self.LOCAL_FILE
            self.load_data()

    def workflowEnvChanged(self, key, value, oldvalue):
        """
        Function called when environment changes (e.g. while saving the scheme)
        It make sure that all environment connected values are modified
        (e.g. relative file paths are changed)
        """
        self.update_file_list(key, value, oldvalue)

    #### End code copy ####

    @staticmethod
    def _is_preproc(p):
        """
        Tests that a preprocessor is not None or empty PreprocessorList
        """
        return not (p is None or (isinstance(p, PreprocessorList)
                                  and len(p.preprocessors) == 0))

    @staticmethod
    def _format_preproc_str(p):
        pstring = str()
        if isinstance(p, PreprocessorList):
            for preproc in p.preprocessors:
                pstring += "\n{0}".format(preproc)
        else:
            pstring = str(p)
        return pstring

    @Inputs.preprocessor
    def update_preprocessor(self, preproc):
        self.Warning.no_preprocessor.clear()
        if not self._is_preproc(preproc):
            self.info_preproc.setText("No preprocessor on input.")
            self.Warning.no_preprocessor()
        elif self.preprocessor is not preproc:
            self.info_preproc.setText("New preprocessor, reload file to use." +
                                      self._format_preproc_str(preproc))
        self.preprocessor = preproc

    def browse_file(self, in_demos=False):
        if in_demos:
            start_file = get_sample_datasets_dir()
            if not os.path.exists(start_file):
                QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with documentation datasets")
                return
        else:
            start_file = self.last_path() or os.path.expanduser("~/")

        readers = [
            f for f in FileFormat.formats if getattr(f, 'read_tile', None)
            and getattr(f, "EXTENSIONS", None)
        ]
        filename, reader, _ = open_filename_dialog(start_file, None, readers)
        if not filename:
            return
        self.add_path(filename)
        if reader is not None:
            self.recent_paths[0].file_format = reader.qualified_name()

        self.source = self.LOCAL_FILE

        if not self._is_preproc(self.preprocessor):
            return self.Warning.no_preprocessor()
        self.load_data()

    @classmethod
    def get_tile_reader(cls, filename):
        """Return reader instance that can be used to read a file tile-wise

        Parameters
        ----------
        filename : str

        Returns
        -------
        FileFormat
        """
        readers = [
            f for f in FileFormat.formats if getattr(f, 'read_tile', None)
            and getattr(f, "EXTENSIONS", None)
        ]
        for reader in readers:
            if os.path.splitext(filename)[1] in reader.EXTENSIONS:
                return reader(filename)

        raise IOError('No readers for file "{}"'.format(filename))

    def _get_reader(self):
        """
        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
            else:
                reader = self.get_tile_reader(path)
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            # set preprocessor here
            if hasattr(reader, "read_tile"):
                reader.set_preprocessor(self.preprocessor)
                if self.preprocessor is not None:
                    self.info_preproc.setText(
                        self._format_preproc_str(
                            self.preprocessor).lstrip("\n"))
            else:
                # only allow readers with tile-by-tile support to run.
                reader = None
            return reader
        elif self.source == self.URL:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)
class OWMovingTransform(widget.OWWidget):
    name = 'Moving Transform'
    description = 'Apply rolling window functions to the time series.'
    icon = 'icons/MovingTransform.svg'
    priority = 20

    inputs = [("Time series", Table, 'set_data')]
    outputs = [("Time series", Timeseries)]

    want_main_area = False

    non_overlapping = settings.Setting(False)
    fixed_wlen = settings.Setting(5)
    transformations = settings.Setting([])
    autocommit = settings.Setting(False)
    last_win_width = settings.Setting(5)

    _NON_OVERLAPPING_WINDOWS = 'Non-overlapping windows'

    UserAdviceMessages = [
        widget.Message(
            'Get the simple moving average (SMA) of a series '
            'by setting the aggregation function to "{}".'.format(Mean),
            'sma-is-mean'),
        widget.Message(
            'If "{}" is checked, the rolling windows don\t '
            'overlap. Instead, they run through the series '
            'side-to-side, so the resulting transformed series is '
            'fixed-window-length-times shorter.'.format(
                _NON_OVERLAPPING_WINDOWS), 'non-overlapping')
    ]

    def __init__(self):
        self.data = None
        box = gui.vBox(self.controlArea, 'Moving Transform')

        def _disable_fixed_wlen():
            fixed_wlen.setDisabled(not self.non_overlapping)
            self.view.repaint()
            self.on_changed()

        gui.checkBox(box,
                     self,
                     'non_overlapping',
                     label=self._NON_OVERLAPPING_WINDOWS,
                     callback=_disable_fixed_wlen,
                     tooltip='If this is checked, instead of rolling windows '
                     'through the series, they are applied side-to-side, '
                     'so the resulting output series will be some '
                     'length-of-fixed-window-times shorter.')
        fixed_wlen = gui.spin(box,
                              self,
                              'fixed_wlen',
                              2,
                              1000,
                              label='Fixed window width:',
                              callback=self.on_changed)
        fixed_wlen.setDisabled(not self.non_overlapping)

        # TODO: allow the user to choose left-aligned, right-aligned, or center-aligned window

        class TableView(gui.TableView):
            def __init__(self, parent):
                super().__init__(
                    parent,
                    editTriggers=(self.SelectedClicked | self.CurrentChanged
                                  | self.DoubleClicked | self.EditKeyPressed),
                )
                self.horizontalHeader().setStretchLastSection(False)
                agg_functions = ListModel(AGG_FUNCTIONS +
                                          [Cumulative_sum, Cumulative_product],
                                          parent=self)
                self.setItemDelegateForColumn(0, self.VariableDelegate(parent))
                self.setItemDelegateForColumn(1, self.SpinDelegate(parent))
                self.setItemDelegateForColumn(
                    2, self.ComboDelegate(self, agg_functions))

            class _ItemDelegate(QStyledItemDelegate):
                def updateEditorGeometry(self, widget, option, _index):
                    widget.setGeometry(option.rect)

            class ComboDelegate(_ItemDelegate):
                def __init__(self, parent=None, combo_model=None):
                    super().__init__(parent)
                    self._parent = parent
                    if combo_model is not None:
                        self._combo_model = combo_model

                def createEditor(self, parent, _QStyleOptionViewItem, index):
                    combo = QComboBox(parent)
                    combo.setModel(self._combo_model)
                    return combo

                def setEditorData(self, combo, index):
                    var = index.model().data(index, Qt.EditRole)
                    combo.setCurrentIndex(self._combo_model.indexOf(var))

                def setModelData(self, combo, model, index):
                    var = self._combo_model[combo.currentIndex()]
                    model.setData(index, var, Qt.EditRole)

            class VariableDelegate(ComboDelegate):
                @property
                def _combo_model(self):
                    return self._parent.var_model

            class SpinDelegate(_ItemDelegate):
                def paint(self, painter, option, index):
                    # Don't paint window length if non-overlapping windows set
                    if not self.parent().non_overlapping:
                        super().paint(painter, option, index)

                def createEditor(self, parent, _QStyleOptionViewItem, _index):
                    # Don't edit window length if non-overlapping windows set
                    if self.parent().non_overlapping:
                        return None
                    spin = QSpinBox(parent, minimum=1, maximum=1000)
                    return spin

                def setEditorData(self, spin, index):
                    spin.setValue(index.model().data(index, Qt.EditRole))

                def setModelData(self, spin, model, index):
                    spin.interpretText()
                    model.setData(index, spin.value(), Qt.EditRole)

        self.var_model = VariableListModel(parent=self)

        self.table_model = model = PyTableModel(self.transformations,
                                                parent=self,
                                                editable=True)
        model.setHorizontalHeaderLabels(
            ['Series', 'Window width', 'Aggregation function'])
        model.dataChanged.connect(self.on_changed)

        self.view = view = TableView(self)
        view.setModel(model)
        box.layout().addWidget(view)

        hbox = gui.hBox(box)
        from os.path import dirname, join
        self.add_button = button = gui.button(hbox,
                                              self,
                                              'Add &Transform',
                                              callback=self.on_add_transform)
        button.setIcon(
            QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png')))

        self.del_button = button = gui.button(hbox,
                                              self,
                                              '&Delete Selected',
                                              callback=self.on_del_transform)
        QIcon.setThemeName('gnome')  # Works for me
        button.setIcon(QIcon.fromTheme('edit-delete'))

        gui.auto_commit(box, self, 'autocommit', '&Apply')

    def sizeHint(self):
        return QSize(450, 600)

    def on_add_transform(self):
        if self.data is not None:
            self.table_model.append(
                [self.var_model[0], self.last_win_width, AGG_FUNCTIONS[0]])
        self.commit()

    def on_del_transform(self):
        for row in sorted(
            [mi.row() for mi in self.view.selectionModel().selectedRows(0)],
                reverse=True):
            del self.table_model[row]
        if len(self.table_model):
            selection_model = self.view.selectionModel()
            selection_model.select(
                self.table_model.index(len(self.table_model) - 1, 0),
                selection_model.Select | selection_model.Rows)
        self.commit()

    def set_data(self, data):
        self.data = data = None if data is None else Timeseries.from_data_table(
            data)
        self.add_button.setDisabled(not len(getattr(data, 'domain', ())))
        self.table_model.clear()
        if data is not None:
            self.var_model.wrap([
                var for var in data.domain
                if var.is_continuous and var is not data.time_variable
            ])
        self.on_changed()

    def on_changed(self):
        self.commit()

    def commit(self):
        data = self.data
        if not data:
            self.send(Output.TIMESERIES, None)
            return

        ts = moving_transform(data, self.table_model, self.non_overlapping
                              and self.fixed_wlen)
        self.send(Output.TIMESERIES, ts)
Example #13
0
class OWGrangerCausality(widget.OWWidget):
    name = 'Granger Causality'
    description = 'Test if one time series Granger-causes (i.e. can be an ' \
                  'indicator of) another.'
    icon = 'icons/GrangerCausality.svg'
    priority = 190

    class Inputs:
        time_series = Input("Time series", Table, replaces=["Timeseries"])

    max_lag = settings.Setting(20)
    confidence = settings.Setting(95)
    autocommit = settings.Setting(False)

    UserAdviceMessages = [
        widget.Message('We say <i>X</i> Granger-causes <i>Y</i> if '
                       'predictions of values of <i>Y</i> based on its own '
                       'past values and on the past values of <i>X</i> are '
                       'better than predictions of <i>Y</i> based on its '
                       'past values alone.<br><br>'
                       'It does NOT mean <i>X</i> causes <i>Y</i>!',
                       'explanation',
                       widget.Message.Warning)
    ]

    class Error(widget.OWWidget.Error):
        unexpected_error = widget.Msg('Unexpected error: {}')

    def __init__(self):
        self.data = None
        box = gui.vBox(self.controlArea, 'Granger Test')
        gui.hSlider(box, self, 'confidence',
                    minValue=90, maxValue=99,
                    label='Confidence:',
                    labelFormat=" %d%%",
                    callback=self.on_changed)
        gui.spin(box, self, 'max_lag', 1, 50,
                 label='Max lag:',
                 callback=self.on_changed)
        gui.auto_commit(box, self, 'autocommit', '&Test')
        gui.rubber(self.controlArea)

        self.model = model = PyTableModel(parent=self)
        model.setHorizontalHeaderLabels(['Min. lag', 'Series 1', '', 'Series 2'])
        view = gui.TableView(self)
        view.setModel(model)
        bold = view.BoldFontDelegate(self)
        view.setItemDelegateForColumn(1, bold)
        view.setItemDelegateForColumn(3, bold)
        view.horizontalHeader().setStretchLastSection(False)
        self.mainArea.layout().addWidget(view)
        # TODO: output the series with subset columns of selected model rows
        # TODO: allow setting filters or choosing what variables to include in test

    def on_changed(self):
        self.commit()

    @Inputs.time_series
    def set_data(self, data):
        self.data = data = None if data is None else \
                           Timeseries.from_data_table(data)
        self.on_changed()

    def commit(self):
        data = self.data
        self.model.clear()
        self.Error.unexpected_error.clear()
        if data is None:
            return

        try:
            with self.progressBar() as progress:
                res = granger_causality(data,
                                        self.max_lag,
                                        1 - self.confidence / 100,
                                        callback=progress.advance)
                res = [[lag, row, '→', col]
                       for lag, row, col in res]
        except (ValueError, LinAlgError) as ex:
            self.Error.unexpected_error(ex.args[0])
        else:
            self.model.wrap(res)
            self.model.sort(0, Qt.DescendingOrder)
Example #14
0
class OWTestAndScore(OWWidget):
    name = "Test and Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100
    keywords = ['Cross Validation', 'CV']
    replaces = ["Orange.widgets.evaluate.owtestlearners.OWTestLearners"]

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    buttons_area_orientation = None
    UserAdviceMessages = [
        widget.Message("Click on the table header to select shown columns",
                       "click_header")
    ]

    settingsHandler = settings.PerfectDomainContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(2)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    use_rope = settings.Setting(False)
    rope = settings.Setting(0.1)
    comparison_criterion = settings.Setting(0, schema_only=True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        test_data_empty = Msg("Test dataset is empty.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train datasets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        test_data_incompatible = Msg(
            "Test data may be incompatible with train data.")
        train_data_error = Msg("{}")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")
        cant_stratify = \
            Msg("Can't run stratified {}-fold cross validation; "
                "the least common class has only {} instances.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")
        test_data_transformed = Msg(
            "Test data has been transformed to match the train data.")
        cant_stratify_numeric = Msg("Stratification is ignored for regression")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []
        self.__pending_comparison_criterion = self.comparison_criterion

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[TaskState]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           searchable=True,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            contentsLength=8,
            searchable=True,
            callback=self._on_target_class_changed)

        self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison")
        gui.comboBox(box,
                     self,
                     "comparison_criterion",
                     callback=self.update_comparison_table)

        hbox = gui.hBox(box)
        gui.checkBox(hbox,
                     self,
                     "use_rope",
                     "Negligible difference: ",
                     callback=self._on_use_rope_changed)
        gui.lineEdit(hbox,
                     self,
                     "rope",
                     validator=QDoubleValidator(),
                     controlWidth=70,
                     callback=self.update_comparison_table,
                     alignment=Qt.AlignRight)
        self.controls.rope.setEnabled(self.use_rope)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)
        view = self.score_table.view
        view.setSizeAdjustPolicy(view.AdjustToContents)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)

        self.compbox = box = gui.vBox(self.mainArea, box="Model comparison")
        table = self.comparison_table = QTableWidget(
            wordWrap=False,
            editTriggers=QTableWidget.NoEditTriggers,
            selectionMode=QTableWidget.NoSelection)
        table.setSizeAdjustPolicy(table.AdjustToContents)
        header = table.verticalHeader()
        header.setSectionResizeMode(QHeaderView.Fixed)
        header.setSectionsClickable(False)

        header = table.horizontalHeader()
        header.setTextElideMode(Qt.ElideRight)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setSectionsClickable(False)
        header.setStretchLastSection(False)
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        avg_width = self.fontMetrics().averageCharWidth()
        header.setMinimumSectionSize(8 * avg_width)
        header.setMaximumSectionSize(15 * avg_width)
        header.setDefaultSectionSize(15 * avg_width)
        box.layout().addWidget(table)
        box.layout().addWidget(
            QLabel(
                "<small>Table shows probabilities that the score for the model in "
                "the row is higher than that of the model in the column. "
                "Small numbers show the probability that the difference is "
                "negligible.</small>",
                wordWrap=True))

    def sizeHint(self):
        sh = super().sizeHint()
        return QSize(780, sh.height())

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestAndScore.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestAndScore.FeatureFold and not enabled:
            self.resampling = OWTestAndScore.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        elif learner is not None:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.cancel()
        self.Information.data_sampled.clear()
        self.Error.train_data_error.clear()

        if data is not None:
            data_errors = [
                ("Train dataset is empty.", len(data) == 0),
                ("Train data input requires a target variable.",
                 not data.domain.class_vars),
                ("Too many target variables.",
                 len(data.domain.class_vars) > 1),
                ("Target variable has no values.", np.isnan(data.Y).all()),
                ("Target variable has only one value.",
                 data.domain.has_discrete_class and len(unique(data.Y)) < 2),
                ("Data has no features to learn from.", data.X.shape[1] == 0),
            ]

            for error_msg, cond in data_errors:
                if cond:
                    self.Error.train_data_error(error_msg)
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestAndScore.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not data:
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestAndScore.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data and self.data.domain.class_var:
            new_scorers = usable_scorers(self.data.domain.class_var)
        else:
            new_scorers = []
        # Don't unnecessarily reset the combo because this would always reset
        # comparison_criterion; we also set it explicitly, though, for clarity
        if new_scorers != self.scorers:
            self.scorers = new_scorers
            combo = self.controls.comparison_criterion
            combo.clear()
            combo.addItems(
                [scorer.long_name or scorer.name for scorer in self.scorers])
            if self.scorers:
                self.comparison_criterion = 0
        if self.__pending_comparison_criterion is not None:
            # Check for the unlikely case that some scorers have been removed
            # from modules
            if self.__pending_comparison_criterion < len(self.scorers):
                self.comparison_criterion = self.__pending_comparison_criterion
            self.__pending_comparison_criterion = None
        self._update_compbox_title()

    def _update_compbox_title(self):
        criterion = self.comparison_criterion
        if criterion < len(self.scorers):
            scorer = self.scorers[criterion]()
            self.compbox.setTitle(f"Model Comparison by {scorer.name}")
        else:
            self.compbox.setTitle(f"Model Comparison")

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.score_table.update_header(self.scorers)
        self._update_view_enabled()
        self.update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestAndScore.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestAndScore.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestAndScore.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self.modcompbox.setEnabled(self.resampling == OWTestAndScore.KFold)
        self._update_view_enabled()
        self._invalidate()
        self.__update()

    def _update_view_enabled(self):
        self.comparison_table.setEnabled(
            self.resampling == OWTestAndScore.KFold and len(self.learners) > 1
            and self.data is not None)
        self.score_table.view.setEnabled(self.data is not None)

    def update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.score_table.model
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        names = []
        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            names.append(name)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            results = slot.results
            if results is not None and results.success:
                train = QStandardItem("{:.3f}".format(
                    results.value.train_time))
                train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                train.setData(key, Qt.UserRole)
                test = QStandardItem("{:.3f}".format(results.value.test_time))
                test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                test.setData(key, Qt.UserRole)
                row = [head, train, test]
            else:
                row = [head]
            if isinstance(results, Try.Fail):
                head.setToolTip(str(results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                if isinstance(results.exception, DomainTransformationError) \
                        and self.resampling == self.TestOnTest:
                    self.Error.test_data_incompatible()
                    self.Information.test_data_transformed.clear()
                else:
                    errors.append("{name} failed with error:\n"
                                  "{exc.__class__.__name__}: {exc!s}".format(
                                      name=name, exc=slot.results.exception))

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [
                        Try(scorer_caller(scorer, ovr_results, target=1))
                        for scorer in self.scorers
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat, scorer in zip(stats, self.scorers):
                    item = QStandardItem()
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    if stat.success:
                        item.setData(float(stat.value[0]), Qt.DisplayRole)
                    else:
                        item.setToolTip(str(stat.exception))
                        if scorer.name in self.score_table.shown_scores:
                            has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        # Resort rows based on current sorting
        header = self.score_table.view.horizontalHeader()
        model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder())
        self._set_comparison_headers(names)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _on_use_rope_changed(self):
        self.controls.rope.setEnabled(self.use_rope)
        self.update_comparison_table()

    def update_comparison_table(self):
        self.comparison_table.clearContents()
        slots = self._successful_slots()
        if not (slots and self.scorers):
            return
        names = [learner_name(slot.learner) for slot in slots]
        self._set_comparison_headers(names)
        if self.resampling == OWTestAndScore.KFold:
            scores = self._scores_by_folds(slots)
            self._fill_table(names, scores)

    def _successful_slots(self):
        model = self.score_table.model
        proxy = self.score_table.sorted_model

        keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole)
                for row in range(proxy.rowCount()))
        slots = [
            slot for slot in (self.learners[key] for key in keys)
            if slot.results is not None and slot.results.success
        ]
        return slots

    def _set_comparison_headers(self, names):
        table = self.comparison_table
        try:
            # Prevent glitching during update
            table.setUpdatesEnabled(False)
            header = table.horizontalHeader()
            if len(names) > 2:
                header.setSectionResizeMode(QHeaderView.Stretch)
            else:
                header.setSectionResizeMode(QHeaderView.Fixed)
            table.setRowCount(len(names))
            table.setColumnCount(len(names))
            table.setVerticalHeaderLabels(names)
            table.setHorizontalHeaderLabels(names)
        finally:
            table.setUpdatesEnabled(True)

    def _scores_by_folds(self, slots):
        scorer = self.scorers[self.comparison_criterion]()
        self._update_compbox_title()
        if scorer.is_binary:
            if self.class_selection != self.TARGET_AVERAGE:
                class_var = self.data.domain.class_var
                target_index = class_var.values.index(self.class_selection)
                kw = dict(target=target_index)
            else:
                kw = dict(average='weighted')
        else:
            kw = {}

        def call_scorer(results):
            def thunked():
                return scorer.scores_by_folds(results.value, **kw).flatten()

            return thunked

        scores = [Try(call_scorer(slot.results)) for slot in slots]
        scores = [score.value if score.success else None for score in scores]
        # `None in scores doesn't work -- these are np.arrays)
        if any(score is None for score in scores):
            self.Warning.scores_not_computed()
        return scores

    def _fill_table(self, names, scores):
        table = self.comparison_table
        for row, row_name, row_scores in zip(count(), names, scores):
            for col, col_name, col_scores in zip(range(row), names, scores):
                if row_scores is None or col_scores is None:
                    continue
                if self.use_rope and self.rope:
                    p0, rope, p1 = baycomp.two_on_single(
                        row_scores, col_scores, self.rope)
                    if np.isnan(p0) or np.isnan(rope) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(
                        table, row, col,
                        f"{p0:.3f}<br/><small>{rope:.3f}</small>",
                        f"p({row_name} > {col_name}) = {p0:.3f}\n"
                        f"p({row_name} = {col_name}) = {rope:.3f}")
                    self._set_cell(
                        table, col, row,
                        f"{p1:.3f}<br/><small>{rope:.3f}</small>",
                        f"p({col_name} > {row_name}) = {p1:.3f}\n"
                        f"p({col_name} = {row_name}) = {rope:.3f}")
                else:
                    p0, p1 = baycomp.two_on_single(row_scores, col_scores)
                    if np.isnan(p0) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(table, row, col, f"{p0:.3f}",
                                   f"p({row_name} > {col_name}) = {p0:.3f}")
                    self._set_cell(table, col, row, f"{p1:.3f}",
                                   f"p({col_name} > {row_name}) = {p1:.3f}")

    @classmethod
    def _set_cells_na(cls, table, row, col):
        cls._set_cell(table, row, col, "NA", "comparison cannot be computed")
        cls._set_cell(table, col, row, "NA", "comparison cannot be computed")

    @staticmethod
    def _set_cell(table, row, col, label, tooltip):
        item = QLabel(label)
        item.setToolTip(tooltip)
        item.setAlignment(Qt.AlignCenter)
        table.setCellWidget(row, col, item)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = (self.TARGET_AVERAGE, ) + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self.update_stats_model()
        self.update_comparison_table()

    def _invalidate(self, which=None):
        self.cancel()
        self.fold_feature_selected = \
            self.resampling == OWTestAndScore.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.score_table.model
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.comparison_table.clearContents()

        self.__needupdate = True

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(
                    combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.score_table.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')
            ]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Error.test_data_incompatible.clear()
        self.Warning.test_data_missing.clear()
        self.Warning.cant_stratify.clear()
        self.Information.cant_stratify_numeric.clear()
        self.Information.test_data_transformed(
            shown=self.resampling == self.TestOnTest and self.data is not None
            and self.test_data is not None and
            self.data.domain.attributes != self.test_data.domain.attributes)
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early or show warnings
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestAndScore.KFold:
            k = self.NFolds[self.n_folds]
            if len(self.data) < k:
                self.Error.too_many_folds()
                self.__state = State.Waiting
                self.commit()
                return
            do_stratify = self.cv_stratified
            if do_stratify:
                if self.data.domain.class_var.is_discrete:
                    least = min(
                        filter(None, np.bincount(self.data.Y.astype(int))))
                    if least < k:
                        self.Warning.cant_stratify(k, least)
                        do_stratify = False
                else:
                    self.Information.cant_stratify_numeric()
                    do_stratify = False

        elif self.resampling == OWTestAndScore.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestAndScore.TestOnTest:
            test_f = partial(
                Orange.evaluation.TestOnTestData(store_data=True,
                                                 store_models=True), self.data,
                self.test_data, learners_c, self.preprocessor)
        else:
            if self.resampling == OWTestAndScore.KFold:
                sampler = Orange.evaluation.CrossValidation(
                    k=self.NFolds[self.n_folds],
                    random_state=rstate,
                    stratified=do_stratify)
            elif self.resampling == OWTestAndScore.FeatureFold:
                sampler = Orange.evaluation.CrossValidationFeature(
                    feature=self.fold_feature)
            elif self.resampling == OWTestAndScore.LeaveOneOut:
                sampler = Orange.evaluation.LeaveOneOut()
            elif self.resampling == OWTestAndScore.ShuffleSplit:
                sampler = Orange.evaluation.ShuffleSplit(
                    n_resamples=self.NRepeats[self.n_repeats],
                    train_size=self.SampleSizes[self.sample_size] / 100,
                    test_size=None,
                    stratified=self.shuffle_stratified,
                    random_state=rstate)
            elif self.resampling == OWTestAndScore.TestOnTrain:
                sampler = Orange.evaluation.TestOnTrainingData(
                    store_models=True)
            else:
                assert False, "self.resampling %s" % self.resampling

            sampler.store_data = True
            test_f = partial(sampler, self.data, learners_c, self.preprocessor)

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[[float], None]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = TaskState()

        def progress_callback(finished):
            if task.is_interruption_requested():
                raise UserInterrupt()
            task.set_progress_value(100 * finished)

        testfunc = partial(testfunc, callback=progress_callback)
        task.start(self.__executor, testfunc)

        task.progress_changed.connect(self.setProgressValue)
        task.watcher.finished.connect(self.__task_complete)

        self.Outputs.evaluations_results.invalidate()
        self.Outputs.predictions.invalidate()
        self.progressBarInit()
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, f: 'Future[Results]'):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        assert self.__task is not None and self.__task.future is f
        self.progressBarFinished()
        self.setStatusMessage("")
        assert f.done()
        self.__task = None
        self.__state = State.Done
        try:
            results = f.result()  # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:  # pylint: disable=broad-except
            log.exception("testing error (in __task_complete):", exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er),
                                                                 er)))
            return

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [
                        Try(scorer_caller(scorer, result))
                        for scorer in self.scorers
                    ]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.score_table.update_header(self.scorers)
        self.update_stats_model()
        self.update_comparison_table()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            task.progress_changed.disconnect(self.setProgressValue)
            task.watcher.finished.disconnect(self.__task_complete)

            self.progressBarFinished()
            self.setStatusMessage("")

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=False)
        super().onDeleteWidget()

    def copy_to_clipboard(self):
        self.score_table.copy_selection_to_clipboard()
Example #15
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")]
    outputs = [("Selected Data", Orange.data.Table)]

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting(0)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    def __init__(self):
        super().__init__()
        if isinstance(self.selected_learner, list):
            self.selected_learner = (self.selected_learner + [0])[0]

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        box = gui.vBox(self.controlArea, "Learners")

        self.learners_box = gui.listBox(box,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        callback=self._learner_changed)
        box = gui.vBox(self.controlArea, "Show")

        gui.comboBox(box,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     callback=self._update)

        box = gui.vBox(self.controlArea, "Select")

        gui.button(box,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(box,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(box,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

        self.outputbox = box = gui.vBox(self.controlArea, "Output")
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.controlArea, self, "autocommit", "Send Selected",
                        "Send Automatically")

        grid = QGridLayout()

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.clicked.connect(self.cell_clicked)
        grid.addWidget(view, 0, 0)
        self.mainArea.layout().addLayout(grid)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 490)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data

        if data is not None and not data.domain.has_discrete_class:
            self.warning("Confusion Matrix cannot show regression results.")

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]
            self.headers = class_values + \
                           [unicodedata.lookup("N-ARY SUMMATION")]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = [
                    "Learner #{}".format(i + 1) for i in range(nmodels)
                ]

            self._init_table(len(class_values))
            self.openContext(data.domain.class_var)
            if prev_sel_learner is None or \
                    prev_sel_learner >= len(self.learners):
                self.selected_learner = 0
            else:
                self.selected_learner = prev_sel_learner
            self._update()
            self._set_selection()
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner is not None:
            indices = self.tableview.selectedIndexes()
            indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
            actual = self.results.actual
            learner_name = self.learners[self.selected_learner]
            predicted = self.results.predicted[self.selected_learner]
            selected = [
                i for i, t in enumerate(zip(actual, predicted)) if t in indices
            ]
            row_indices = self.results.row_indices[selected]

            extra = []
            class_var = self.data.domain.class_var
            metas = self.data.domain.metas

            if self.append_predictions:
                predicted = numpy.array(predicted[selected], dtype=object)
                extra.append(predicted.reshape(-1, 1))
                var = Orange.data.DiscreteVariable(
                    "{}({})".format(class_var.name, learner_name),
                    class_var.values)
                metas = metas + (var, )

            if self.append_probabilities and \
                    self.results.probabilities is not None:
                probs = self.results.probabilities[self.selected_learner,
                                                   selected]
                extra.append(numpy.array(probs, dtype=object))
                pvars = [
                    Orange.data.ContinuousVariable("p({})".format(value))
                    for value in class_var.values
                ]
                metas = metas + tuple(pvars)

            X = self.data.X[row_indices]
            Y = self.data.Y[row_indices]
            M = self.data.metas[row_indices]
            row_ids = self.data.ids[row_indices]

            M = numpy.hstack((M, ) + tuple(extra))
            domain = Orange.data.Domain(self.data.domain.attributes,
                                        self.data.domain.class_vars, metas)
            data = Orange.data.Table.from_numpy(domain, X, Y, M)
            data.ids = row_ids
            data.name = learner_name

        else:
            data = None

        self.send("Selected Data", data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner is not None:
            cmatrix = confusion_matrix(self.results, self.selected_learner)
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = numpy.diag_indices(n)

            colors = cmatrix.astype(numpy.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(numpy.int)
                formatstr = "{}"
                div = numpy.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, numpy.newaxis]
                    div = colors.max(axis=1)[:, numpy.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner is not None:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)
class OWDifference(widget.OWWidget):
    name = 'Difference'
    description = 'Make the time series stationary by replacing it with ' \
                  '1st or 2nd order discrete difference along its values. '
    icon = 'icons/Difference.svg'
    priority = 570
    keywords = ['difference', 'derivative', 'quotient', 'percent change']

    class Inputs:
        time_series = Input("Time series", Table)

    class Outputs:
        time_series = Output("Time series", Timeseries)

    settingsHandler = DomainContextHandler()
    selected = ContextSetting([], schema_only=True)

    class Operation(str, Enum):
        DIFF = 'Difference'
        QUOT = 'Quotient'
        PERC = 'Percentage change'

    want_main_area = False
    resizing_enabled = False

    chosen_operation = settings.Setting(Operation.DIFF)
    diff_order = settings.Setting(1)
    shift_period = settings.Setting(1)
    invert_direction = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            'Series can be differentiated up to the 2nd order. '
            'However, if the series is shifted by other than 1 '
            'step, a differencing order of 1 is always assumed.', 'diff-shift')
    ]

    def __init__(self):
        self.data = None

        box = gui.vBox(self.controlArea, 'Differencing')

        gui.comboBox(box,
                     self,
                     'chosen_operation',
                     orientation=Qt.Horizontal,
                     items=[el.value for el in self.Operation],
                     label='Compute:',
                     callback=self.on_changed,
                     sendSelectedValue=True)

        self.order_spin = gui.spin(
            box,
            self,
            'diff_order',
            1,
            2,
            label='Differencing order:',
            callback=self.on_changed,
            tooltip='The value corresponds to n-th order numerical '
            'derivative of the series. \nThe order is fixed to 1 '
            'if the shift period is other than 1.')
        gui.spin(box,
                 self,
                 'shift_period',
                 1,
                 100,
                 label='Shift:',
                 callback=self.on_changed,
                 tooltip='Set this to other than 1 if you don\'t want to '
                 'compute differences for subsequent values but for '
                 'values shifted number of spaces apart. \n'
                 'If this value is different from 1, differencing '
                 'order is fixed to 1.')
        gui.checkBox(box,
                     self,
                     'invert_direction',
                     label='Invert differencing direction',
                     callback=self.on_changed,
                     tooltip='Influences where the series is padded with nan '
                     'values — at the beginning or at the end.')
        self.view = view = QListView(self,
                                     selectionMode=QListView.ExtendedSelection)
        self.model = model = VariableListModel(parent=self)
        view.setModel(model)
        view.selectionModel().selectionChanged.connect(self.on_changed)
        box.layout().addWidget(view)
        gui.auto_commit(box, self, 'autocommit', '&Apply')

    @Inputs.time_series
    def set_data(self, data):
        self.closeContext()
        self.data = data = None if data is None else Timeseries.from_data_table(
            data)
        if data is not None:
            self.model[:] = [
                var for var in data.domain.variables
                if var.is_continuous and var is not data.time_variable
            ]
            self.select_default_variable()
            self.openContext(self.data)
            self._restore_selection()
        else:
            self.reset_model()
        self.on_changed()

    def _restore_selection(self):
        def restore(view, selection):
            with signal_blocking(view.selectionModel()):
                # gymnastics for transforming variable names back to indices
                var_list = [
                    var for var in self.data.domain.variables
                    if var.is_continuous and var is not self.data.time_variable
                ]
                indices = [var_list.index(i) for i in selection]
                select_rows(view, indices)

        restore(self.view, self.selected)

    def select_default_variable(self):
        self.selected = [0]
        select_rows(self.view, self.selected)

    def reset_model(self):
        self.model.wrap([])

    def on_changed(self):
        var_names = [
            i.row() for i in self.view.selectionModel().selectedRows()
        ]
        self.order_spin.setEnabled(
            self.shift_period == 1
            and self.chosen_operation == self.Operation.DIFF)
        self.selected = [self.model[v] for v in var_names]
        self.commit()

    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.Outputs.time_series.send(None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        op = self.chosen_operation

        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if op == self.Operation.DIFF and shift == 1:
                out[order:] = np.diff(col, order)
                out[:order] = np.nan
            else:
                if op == self.Operation.DIFF:
                    out[shift:] = col[shift:] - col[:-shift]
                else:
                    out[shift:] = np.divide(col[shift:], col[:-shift])
                    if op == self.Operation.PERC:
                        out = (out - 1) * 100
                out[:shift] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            if op == self.Operation.DIFF and shift == 1:
                details = f'order={order}'
            else:
                details = f'shift={shift}'

            template = f'{var} ({op[:4].lower()}; {details})'
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries(
            Domain(data.domain.attributes + tuple(attrs),
                   data.domain.class_vars, data.domain.metas),
            np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.Outputs.time_series.send(ts)
class OWNxExplorer(widget.OWWidget):
    name = "Network Explorer"
    description = "Visually explore the network and its properties."
    icon = "icons/NetworkExplorer.svg"
    priority = 6420

    class Inputs:
        network = Input("Network", network.Graph, default=True)
        node_subset = Input("Node Subset", Table)
        node_data = Input("Node Data", Table)
        node_distances = Input("Node Distances", Orange.misc.DistMatrix)

    class Outputs:
        subgraph = Output("Selected sub-network", network.Graph)
        unselected_subgraph = Output("Remaining sub-network", network.Graph)
        distances = Output("Distance matrix", Orange.misc.DistMatrix)
        selected = Output("Selected items", Table)
        highlighted = Output("Highlighted items", Table)
        remaining = Output("Remaining items", Table)

    UserAdviceMessages = [
        widget.Message(
            'When selecting nodes on the Marking tab, '
            'press <b><tt>Enter</tt></b> key to add '
            '<b><font color="{}">highlighted</font></b> nodes to '
            '<b><font color="{}">selection</font></b>.'.format(
                Node.Pen.HIGHLIGHTED.color().name(),
                Node.Pen.SELECTED.color().name()), 'marking-info',
            widget.Message.Information),
        widget.Message(
            'Left-click to select nodes '
            '(hold <b><tt>Shift</tt></b> to append to selection). '
            'Right-click to pan/move the view. Scroll to zoom.', 'mouse-info',
            widget.Message.Information),
    ]

    settingsHandler = DomainContextHandler()

    do_auto_commit = Setting(True)
    selectionMode = Setting(SelectionMode.FROM_INPUT)
    tabIndex = Setting(0)
    showEdgeWeights = Setting(False)
    relativeEdgeWidths = Setting(False)
    randomizePositions = Setting(True)
    invertNodeSize = Setting(False)
    markDistance = Setting(1)
    markSearchString = Setting("")
    markNBest = Setting(1)
    markNConnections = Setting(2)

    point_width = Setting(10)
    edge_width = Setting(1)
    attr_size = ContextSetting(None)
    attr_color = ContextSetting(None)
    attrs_label = ContextSetting({})
    attrs_tooltip = ContextSetting({})
    graph_name = 'view'

    class Warning(widget.OWWidget.Warning):
        distance_matrix_size = widget.Msg(
            "Distance matrix size doesn't match the number of network nodes. Not using it."
        )
        no_graph_found = widget.Msg('No graph found!')
        no_graph_or_items = widget.Msg(
            'No graph provided or no items attached to the graph.')

    class Error(widget.OWWidget.Error):
        instance_for_each_node = widget.Msg(
            'Items table must have one instance for each network node.')
        network_too_large = widget.Msg(
            'Network is too large to visualize. Sorry.')

    def __init__(self):
        super().__init__()
        #self.contextHandlers = {"": DomainContextHandler("", [ContextField("attributes", selected="node_label_attrs"), ContextField("attributes", selected="tooltipAttributes"), "color"])}

        self.view = GraphView(self)
        self.mainArea.layout().addWidget(self.view)

        self.graph_attrs = []

        self.acceptingEnterKeypress = False

        self.node_label_attrs = []
        self.tooltipAttributes = []
        self.searchStringTimer = QTimer(self)
        self.markInputItems = None
        self.node_color_attr = 0
        self.node_size_attr = 0

        self.nHighlighted = 0
        self.nSelected = 0
        self.verticesPerEdge = 0
        self.edgesPerVertex = 0

        self.items_matrix = None
        self.number_of_nodes_label = 0
        self.number_of_edges_label = 0

        self.graph = None

        self.setMinimumWidth(600)

        self.tabs = gui.tabWidget(self.controlArea)
        self.displayTab = gui.createTabPage(self.tabs, "Display")
        self.markTab = gui.createTabPage(self.tabs, "Marking")

        def on_tab_changed(index):
            self.tabIndex = index
            self.set_selection_mode()

        self.tabs.currentChanged.connect(on_tab_changed)
        self.tabs.setCurrentIndex(self.tabIndex)

        ib = gui.widgetBox(self.displayTab, "Info")
        gui.label(
            ib, self,
            "Nodes: %(number_of_nodes_label)i (%(verticesPerEdge).2f per edge)"
        )
        gui.label(
            ib, self,
            "Edges: %(number_of_edges_label)i (%(edgesPerVertex).2f per node)")

        box = gui.widgetBox(self.displayTab, "Nodes")

        self.relayout_button = gui.button(box,
                                          self,
                                          'Re-layout',
                                          callback=self.relayout,
                                          autoDefault=False)
        self.randomize_cb = gui.checkBox(box, self, "randomizePositions",
                                         "Randomize positions")
        self.view.positionsChanged.connect(
            lambda positions, progress: self.progressbar.widget.progressBarSet(
                int(round(100 * progress))))

        def animationFinished():
            self.relayout_button.setEnabled(True)
            self.progressbar.finish()

        self.view.animationFinished.connect(animationFinished)

        self.color_model = VariableListModel(placeholder="(Same color)")
        self.color_combo = gui.comboBox(box,
                                        self,
                                        "attr_color",
                                        label='Color:',
                                        orientation='horizontal',
                                        callback=self.set_node_colors,
                                        model=self.color_model)

        self.size_model = VariableListModel(placeholder="(Same size)")
        self.size_combo = gui.comboBox(box,
                                       self,
                                       "attr_size",
                                       label='Size:',
                                       orientation='horizontal',
                                       callback=self.set_node_sizes,
                                       model=self.size_model)
        gui.hSlider(box,
                    self,
                    'point_width',
                    label="Symbol size:   ",
                    minValue=1,
                    maxValue=10,
                    step=1,
                    createLabel=False,
                    callback=self.set_node_sizes)
        hb = gui.widgetBox(box, orientation="horizontal")
        hb.layout().addStretch(1)
        self.invertNodeSizeCheck = gui.checkBox(hb,
                                                self,
                                                "invertNodeSize",
                                                "Invert",
                                                callback=self.set_node_sizes)

        hb = gui.widgetBox(self.displayTab,
                           box="Node labels | tooltips",
                           orientation="horizontal",
                           addSpace=False)
        self.attListBox = gui.listBox(
            hb,
            self,
            "node_label_attrs",
            "graph_attrs",
            selectionMode=QListWidget.MultiSelection,
            sizeHint=QSize(100, 100),
            callback=self._on_node_label_attrs_changed)
        self.tooltipListBox = gui.listBox(
            hb,
            self,
            "tooltipAttributes",
            "graph_attrs",
            selectionMode=QListWidget.MultiSelection,
            sizeHint=QSize(100, 100),
            callback=self._clicked_tooltip_lstbox)

        eb = gui.widgetBox(self.displayTab, "Edges", orientation="vertical")
        self.checkbox_relative_edges = gui.checkBox(
            eb,
            self,
            'relativeEdgeWidths',
            'Relative edge widths',
            callback=self.set_edge_sizes)
        gui.hSlider(eb,
                    self,
                    'edge_width',
                    label="Edge width: ",
                    minValue=1,
                    maxValue=10,
                    step=1,
                    createLabel=False,
                    callback=self.set_edge_sizes)
        self.checkbox_show_weights = gui.checkBox(
            eb,
            self,
            'showEdgeWeights',
            'Show edge weights',
            callback=self.set_edge_labels)

        ib = gui.widgetBox(self.markTab, "Info", orientation="vertical")
        gui.label(ib, self, "Nodes: %(number_of_nodes_label)i")
        gui.label(ib, self, "Selected: %(nSelected)i")
        gui.label(ib, self, "Highlighted: %(nHighlighted)i")

        def on_selection_change():
            self.nSelected = len(self.view.getSelected())
            self.nHighlighted = len(self.view.getHighlighted())
            self.set_selection_mode()
            self.commit()

        self.view.selectionChanged.connect(on_selection_change)

        ib = gui.widgetBox(self.markTab, "Highlight nodes ...")
        ribg = gui.radioButtonsInBox(ib,
                                     self,
                                     "selectionMode",
                                     callback=self.set_selection_mode)
        gui.appendRadioButton(ribg, "None")
        gui.appendRadioButton(ribg, "... whose attributes contain:")
        self.ctrlMarkSearchString = gui.lineEdit(
            gui.indentedBox(ribg),
            self,
            "markSearchString",
            callback=self._set_search_string_timer,
            callbackOnType=True)
        self.searchStringTimer.timeout.connect(self.set_selection_mode)

        gui.appendRadioButton(ribg,
                              "... neighbours of selected, ≤ N hops away")
        ib = gui.indentedBox(ribg, orientation=0)
        self.ctrlMarkDistance = gui.spin(
            ib,
            self,
            "markDistance",
            1,
            100,
            1,
            label="Hops:",
            callback=lambda: self.set_selection_mode(SelectionMode.NEIGHBORS))
        ib.layout().addStretch(1)
        gui.appendRadioButton(ribg, "... with at least N connections")
        gui.appendRadioButton(ribg, "... with at most N connections")
        ib = gui.indentedBox(ribg, orientation=0)
        self.ctrlMarkNConnections = gui.spin(
            ib,
            self,
            "markNConnections",
            0,
            1000000,
            1,
            label="Connections:",
            callback=lambda: self.set_selection_mode(
                SelectionMode.AT_MOST_N if self.selectionMode == SelectionMode.
                AT_MOST_N else SelectionMode.AT_LEAST_N))
        ib.layout().addStretch(1)
        gui.appendRadioButton(ribg,
                              "... with more connections than any neighbor")
        gui.appendRadioButton(
            ribg, "... with more connections than average neighbor")
        gui.appendRadioButton(ribg, "... with most connections")
        ib = gui.indentedBox(ribg, orientation=0)
        self.ctrlMarkNumber = gui.spin(
            ib,
            self,
            "markNBest",
            1,
            1000000,
            1,
            label="Number of nodes:",
            callback=lambda: self.set_selection_mode(SelectionMode.MOST_CONN))
        ib.layout().addStretch(1)
        self.markInputRadioButton = gui.appendRadioButton(
            ribg, "... from Node Subset input signal")
        self.markInputRadioButton.setEnabled(True)

        gui.auto_commit(ribg, self, 'do_auto_commit', 'Output changes')
        self.markTab.layout().addStretch(1)

        self.set_graph(None)
        self.set_selection_mode()

    def sizeHint(self):
        return QSize(800, 600)

    def commit(self):
        self.send_data()

    @Inputs.node_distances
    def set_items_distance_matrix(self, matrix):
        assert matrix is None or isinstance(matrix, Orange.misc.DistMatrix)
        self.items_matrix = matrix
        self.relayout()

    def _set_search_string_timer(self):
        self.selectionMode = SelectionMode.SEARCH
        self.searchStringTimer.stop()
        self.searchStringTimer.start(300)

    def switchTab(self, index=None):
        index = index or self.tabs.currentIndex()
        curTab = self.tabs.widget(index)
        self.acceptingEnterKeypress = False
        if curTab == self.markTab and self.selectionMode != SelectionMode.NONE:
            self.acceptingEnterKeypress = True

    @non_reentrant
    def set_selection_mode(self, selectionMode=None):
        self.searchStringTimer.stop()
        selectionMode = self.selectionMode = selectionMode or self.selectionMode
        self.switchTab()
        if (self.graph is None
                or self.tabs.widget(self.tabs.currentIndex()) != self.markTab
                and selectionMode != SelectionMode.FROM_INPUT):
            return

        if selectionMode == SelectionMode.NONE:
            self.view.setHighlighted([])
        elif selectionMode == SelectionMode.SEARCH:
            table, txt = self.graph.items(), self.markSearchString.lower()
            if not table or not txt: return
            toMark = set(i for i, instance in enumerate(table)
                         if txt in " ".join(map(str, instance.list)).lower())
            self.view.setHighlighted(toMark)
        elif selectionMode == SelectionMode.NEIGHBORS:
            selected = set(self.view.getSelected())
            neighbors = selected.copy()
            for _ in range(self.markDistance):
                for neigh in list(neighbors):
                    neighbors |= set(self.graph[neigh].keys())
            neighbors -= selected
            self.view.setHighlighted(neighbors)
        elif selectionMode == SelectionMode.AT_LEAST_N:
            self.view.setHighlighted(
                set(node for node, degree in self.graph.degree()
                    if degree >= self.markNConnections))
        elif selectionMode == SelectionMode.AT_MOST_N:
            self.view.setHighlighted(
                set(node for node, degree in self.graph.degree()
                    if degree <= self.markNConnections))
        elif selectionMode == SelectionMode.ANY_NEIGH:
            self.view.setHighlighted(
                set(node for node, degree in self.graph.degree() if degree >
                    max(dict(self.graph.degree(self.graph[node])).values(),
                        default=0)))
        elif selectionMode == SelectionMode.AVG_NEIGH:
            self.view.setHighlighted(
                set(node for node, degree in self.graph.degree()
                    if degree > np.nan_to_num(
                        np.mean(
                            list(
                                dict(self.graph.degree(
                                    self.graph[node])).values())))))
        elif selectionMode == SelectionMode.MOST_CONN:
            degrees = np.array(
                sorted(self.graph.degree(), key=lambda i: i[1], reverse=True))
            cut_ind = max(1, min(self.markNBest, self.graph.number_of_nodes()))
            cut_degree = degrees[cut_ind - 1, 1]
            toMark = set(degrees[degrees[:, 1] >= cut_degree, 0])
            self.view.setHighlighted(toMark)
        elif selectionMode == SelectionMode.FROM_INPUT:
            tomark = {}
            if self.markInputItems:
                ids = set(self.markInputItems.ids)
                tomark = {
                    x
                    for x in self.graph if self.graph.items()[x].id in ids
                }
            self.view.setHighlighted(tomark)

    def keyReleaseEvent(self, ev):
        """On Enter, expand the selected set with the highlighted"""
        if (not self.acceptingEnterKeypress
                or ev.key() not in (Qt.Key_Return, Qt.Key_Enter)):
            super().keyReleaseEvent(ev)
            return
        highlighted = self.view.getHighlighted()
        self.view.setSelected(highlighted, extend=True)
        self.view.setHighlighted([])
        self.set_selection_mode()

    def save_network(self):
        # TODO: this was never reviewed since Orange2
        if self.view is None or self.graph is None:
            return

        filename = QFileDialog.getSaveFileName(
            self, 'Save Network', '',
            'NetworkX graph as Python pickle (*.gpickle)\n'
            'NetworkX edge list (*.edgelist)\n'
            'Pajek network (*.net *.pajek)\n'
            'GML network (*.gml)')
        if filename:
            _, ext = os.path.splitext(filename)
            if not ext: filename += ".net"
            items = self.graph.items()
            for i in range(self.graph.number_of_nodes()):
                graph_node = self.graph.node[i]
                plot_node = self.networkCanvas.networkCurve.nodes()[i]

                if items is not None:
                    ex = items[i]
                    if 'x' in ex.domain: ex['x'] = plot_node.x()
                    if 'y' in ex.domain: ex['y'] = plot_node.y()

                graph_node['x'] = plot_node.x()
                graph_node['y'] = plot_node.y()

            network.readwrite.write(self.graph, filename)

    def send_data(self):
        if not self.graph:
            for output in dir(self.Outputs):
                if not output.startswith('__'):
                    getattr(self.Outputs, output).send(None)
            return
        selected = self.view.getSelected()
        self.Outputs.subgraph.send(
            self.graph.subgraph(selected) if selected else None)
        self.Outputs.unselected_subgraph.send(
            self.graph.subgraph(self.view.getUnselected()
                                ) if selected else self.graph)
        self.Outputs.distances.send(
            self.items_matrix.submatrix(sorted(selected))
            if self.items_matrix is not None and selected else None)
        items = self.graph.items()
        if not items:
            self.Outputs.selected.send(None)
            self.Outputs.highlighted.send(None)
            self.Outputs.remaining.send(None)
        else:
            highlighted = self.view.getHighlighted()
            self.Outputs.selected.send(items[
                sorted(selected), :] if selected else None)
            self.Outputs.highlighted.send(items[
                sorted(highlighted), :] if highlighted else None)
            remaining = sorted(
                set(self.graph) - set(selected) - set(highlighted))
            self.Outputs.remaining.send(items[
                remaining, :] if remaining else None)

    def _set_combos(self):
        self._clear_combos()
        self.graph_attrs = self.graph.items_vars()

        self.color_model[:] = [None] + [
            v for v in self.graph_attrs if v.is_primitive()
        ]
        self.size_model[:] = [None] + [
            v for v in self.graph_attrs if v.is_continuous
        ]
        self.size_combo.setDisabled(not self.graph_attrs)
        self.color_combo.setDisabled(not self.graph_attrs)
        self.set_node_sizes()
        self.set_node_colors()
        self.set_edge_sizes()

        for columns, box in ((self.attrs_label, self.attListBox),
                             (self.attrs_tooltip, self.tooltipListBox)):
            columns = [var.name for var in columns]
            if columns:
                selection = QItemSelection()
                model = box.model()
                for i in range(box.count()):
                    if str(box.item(i).text()) in columns:
                        selection.append(QItemSelectionRange(model.index(i,
                                                                         0)))
                selmodel = box.selectionModel()
                selmodel.select(selection, selmodel.Select | selmodel.Clear)
            else:
                box.selectionModel().clearSelection()
        self._on_node_label_attrs_changed()
        self._clicked_tooltip_lstbox()

    def _clear_combos(self):
        self.graph_attrs = []
        self.color_combo.clear()
        self.size_combo.clear()

    def set_graph_none(self):
        self.graph = None
        self.graph_base = None
        self._clear_combos()
        self.number_of_nodes_label = 0
        self.number_of_edges_label = 0
        self.verticesPerEdge = 0
        self.edgesPerVertex = 0
        self._items = None
        self.view.set_graph(None)

    @Inputs.network
    def set_graph(self, graph):
        if not graph:
            return self.set_graph_none()
        if graph.number_of_nodes() < 2:
            self.set_graph_none()
            self.information(
                'I\'m not really in a mood to visualize just one node. Try again tomorrow.'
            )
            return
        if graph.number_of_nodes() + graph.number_of_edges() > 30000:
            self.set_graph_none()
            self.Error.network_too_large()
            return
        self.information()
        self.closeContext()

        all_edges_equal = bool(
            1 == len(set(w for u, v, w in graph.edges(data='weight'))))
        self.checkbox_show_weights.setEnabled(not all_edges_equal)
        self.checkbox_relative_edges.setEnabled(not all_edges_equal)

        self.graph_base = graph
        self.graph = graph.copy()
        # Set items table from the separate signal
        if self._items: self.set_items(self._items)

        self.view.set_graph(self.graph, relayout=False)

        # Set labels
        self.number_of_nodes_label = self.graph.number_of_nodes()
        self.number_of_edges_label = self.graph.number_of_edges()
        self.verticesPerEdge = self.graph.number_of_nodes() / max(
            1, self.graph.number_of_edges())
        self.edgesPerVertex = self.graph.number_of_edges() / max(
            1, self.graph.number_of_nodes())

        self._set_combos()
        if self.graph.items():
            self.openContext(self.graph.items().domain)
        self.Error.clear()

        self.set_selection_mode()
        self.randomizePositions = True
        self.relayout()

    @Inputs.node_data
    def set_items(self, items=None):
        self._items = items
        if items is None:
            return self.set_graph(self.graph_base)
        if not self.graph:
            self.Warning.no_graph_found()
            return
        self.Warning.clear()
        if len(items) != self.graph.number_of_nodes():
            self.Error.instance_for_each_node()
            return
        self.Error.instance_for_each_node.clear()
        self.graph.set_items(items)
        self._set_combos()

    @Inputs.node_subset
    def set_marking_items(self, items):
        self.markInputRadioButton.setEnabled(False)
        self.markInputItems = items

        self.Warning.clear()

        if self.selectionMode == SelectionMode.FROM_INPUT and \
                (items is None or self.graph is None or self.graph.items() is None):
            self.selectionMode = SelectionMode.NONE

        if items is None:
            self.view.selectionChanged.emit()
            return

        if self.graph is None or self.graph.items() is None:
            self.Warning.no_graph_or_items()
            return

        if len(items) > 0:
            self.markInputRadioButton.setEnabled(True)
        self.view.selectionChanged.emit()

    def relayout(self):
        if self.graph is None or self.graph.number_of_nodes() <= 1:
            return
        self.progressbar = gui.ProgressBar(self, FR_ITERATIONS)

        distmatrix = self.items_matrix
        if distmatrix is not None and distmatrix.shape[
                0] != self.graph.number_of_nodes():
            self.Warning.distance_matrix_size()
            distmatrix = None
        self.Warning.distance_matrix_size.clear()

        self.relayout_button.setDisabled(True)
        self.view.relayout(randomize=self.randomizePositions,
                           weight=distmatrix)

    def _on_node_label_attrs_changed(self):
        if not self.graph: return
        attributes = self.attrs_label = [
            self.graph_attrs[i] for i in self.node_label_attrs
        ]
        if attributes:
            table = self.graph.items()
            if not table: return
            for i, node in enumerate(self.view.nodes):
                text = ', '.join(map(str, table[i, attributes][0].list))
                node.setText(text)
        else:
            for node in self.view.nodes:
                node.setText('')

    def _clicked_tooltip_lstbox(self):
        if not self.graph: return
        attributes = self.attrs_tooltip = [
            self.graph_attrs[i] for i in self.tooltipAttributes
        ]
        if attributes:
            table = self.graph.items()
            if not table: return
            assert self.view.nodes
            for i, node in enumerate(self.view.nodes):
                node.setTooltip(
                    lambda row=i, attributes=attributes, table=table: '<br>'.
                    join('<b>{.name}:</b> {}'.format(
                        i[0],
                        str(i[1]).replace('<', '&lt;')) for i in zip(
                            attributes, table[row, attributes][0].list)))
        else:
            for node in self.view.nodes:
                node.setTooltip(None)

    def set_edge_labels(self):
        if not self.graph:
            return
        if self.showEdgeWeights:
            weights = (str(w or '')
                       for u, v, w in self.graph.edges(data='weight'))
        else:
            weights = ('' for i in range(self.graph.number_of_edges()))
        for edge, weight in zip(self.view.edges, weights):
            edge.setText(weight)

    def set_node_colors(self):
        if not self.graph: return
        attribute = self.attr_color
        assert not attribute or isinstance(attribute, Orange.data.Variable)
        if self.view.legend is not None:
            self.view.scene().removeItem(self.view.legend)
            self.view.legend.clear()
        else:
            self.view.legend = LegendItem()
            self.view.legend.set_parent(self.view)
        if not attribute:
            for node in self.view.nodes:
                node.setColor(None)
            return
        table = self.graph.items()
        if not table: return
        if attribute in table.domain.class_vars:
            values = table[:, attribute].Y
            if values.ndim > 1:
                values = values.T
        elif attribute in table.domain.metas:
            values = table[:, attribute].metas[:, 0]
        elif attribute in table.domain.attributes:
            values = table[:, attribute].X[:, 0]
        else:
            raise RuntimeError("Shouldn't be able to select this column")
        if attribute.is_continuous:
            colors = CONTINUOUS_PALETTE[scale(values)]
            label = PaletteItemSample(
                CONTINUOUS_PALETTE,
                DiscretizedScale(np.nanmin(values), np.nanmax(values)))
            self.view.legend.addItem(label, "")
            self.view.legend.setGeometry(label.boundingRect())
        elif attribute.is_discrete:
            DISCRETE_PALETTE = ColorPaletteGenerator(len(attribute.values))
            colors = DISCRETE_PALETTE[values]
            for value, color in zip(attribute.values, DISCRETE_PALETTE):
                self.view.legend.addItem(
                    ScatterPlotItem(pen=Node.Pen.DEFAULT,
                                    brush=QBrush(QColor(color)),
                                    size=10,
                                    symbol="o"), escape(value))
        for node, color in zip(self.view.nodes, colors):
            node.setColor(color)
        self.view.scene().addItem(self.view.legend)
        self.view.legend.geometry_changed()

    def set_node_sizes(self):
        self.invertNodeSizeCheck.setDisabled(not self.attr_size)

        if not self.graph:
            return
        table = self.graph.items()
        if table is None:
            return

        try:
            a = table.get_column_view(self.attr_size)[0]
            values = a.copy()
        except Exception:
            for node in self.view.nodes:
                node.setSize(MIN_NODE_SIZE * self.point_width)
            return

        if self.invertNodeSize:
            values += np.nanmin(values) + 1
            values = 1 / values
        nodemin, nodemax = np.nanmin(values), np.nanmax(values)
        if nodemin == nodemax:
            # np.polyfit borks on this condition
            sizes = (MIN_NODE_SIZE for _ in range(len(self.view.nodes)))
        else:
            k, n = np.polyfit([nodemin, nodemax],
                              [MIN_NODE_SIZE, MAX_NODE_SIZE], 1)
            sizes = values * k + n
            sizes[np.isnan(sizes)] = np.nanmean(sizes)
        for node, size in zip(self.view.nodes, sizes):
            node.setSize(size * self.point_width)

    def set_edge_sizes(self):
        if not self.graph: return
        if self.relativeEdgeWidths:
            widths = [
                self.graph.adj[u][v].get('weight', 1)
                for u, v in self.graph.edges()
            ]
            widths = scale(widths, .7, 8) * np.log2(self.edge_width / 4 + 1)
        else:
            widths = (.7 * self.edge_width
                      for _ in range(self.graph.number_of_edges()))
        for edge, width in zip(self.view.edges, widths):
            edge.setSize(width)

    def send_report(self):
        self.report_data("Data", self.graph.items())
        self.report_items('Graph info', [
            ("Number of vertices", self.graph.number_of_nodes()),
            ("Number of edges", self.graph.number_of_edges()),
            ("Vertices per edge", "%.3f" % self.verticesPerEdge),
            ("Edges per vertex", "%.3f" % self.edgesPerVertex),
        ])
        if self.node_color_attr or self.node_size_attr or self.node_label_attrs:
            self.report_items("Visual settings", [
                ("Vertex color", self.colorCombo.currentText()),
                ("Vertex size", str(self.nodeSizeCombo.currentText()) +
                 " (inverted)" if self.invertNodeSize else ""),
                ("Labels", ", ".join(self.graph_attrs[i].name
                                     for i in self.node_label_attrs)),
            ])
        self.report_plot("Graph", self.view)